void bli_her_int( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, her_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t x_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her_int_check( conjh, alpha, x, c, cntl ); // If C or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; if ( bli_obj_has_zero_dim( *x ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( *x, x_local ); bli_obj_alias_to( *c, c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( c_local ) ) { bli_obj_toggle_conj( c_local ); // Notice that we don't need to conjugate alpha since it is guaranteed // to be real. bli_obj_toggle_conj( x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &x_local, &c_local, cntl ); }
void bli_scalv_int( obj_t* beta, obj_t* x, scalv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalv_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if the beta scalar equals one. if ( bli_obj_equals( beta, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( beta, x ); }
void bli_scalm_int( obj_t* beta, obj_t* x, scalm_t* cntl ) { obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalm_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if both beta and the scalar attached to x are unit. if ( bli_obj_equals( beta, &BLIS_ONE ) && bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; // Alias x to x_local so we can apply beta if it is non-unit. bli_obj_alias_to( *x, x_local ); // If beta is non-unit, apply it to the scalar attached to x. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &x_local ); }
void bli_unpackv_int( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ) { // The unpackv operation consists of an optional casting post-process. // (This post-process is analogous to the cast pre-process in packv.) // Here are the following possible ways unpackv can execute: // 1. unpack and cast: Unpack to a temporary vector c and then cast // c to a. // 2. unpack only: Unpack directly to vector a since typecasting is // not needed. // 3. cast only: Not yet supported / not used. // 4. no-op: The control tree directs us to skip the unpack operation // entirely. No action is taken. obj_t c; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_unpackv_check( p, a, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply return. if ( cntl_is_noop( cntl ) ) { return; } // If p was aliased to a during the pack stage (because it was already // in an acceptable packed/contiguous format), then no unpack is actually // necessary, so we return. if ( bli_obj_is_alias_of( *p, *a ) ) { return; } // Now, if we are not skipping the unpack operation, then the only // question left is whether we are to typecast vector a after unpacking. if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) bli_abort(); /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Initialize an object c for the intermediate typecast vector. bli_unpackv_init_cast( p, a, &c ); } else */ { // If no cast is needed, then aliasing object c to the original // vector serves as a minor optimization. This causes the unpackv // implementation to unpack directly into vector a. bli_obj_alias_to( *a, c ); } // Now we are ready to proceed with the unpacking. // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( p, &c, cntx, cntl ); // Now, if necessary, we cast the contents of c to vector a. If casting // was not necessary, then we are done because the call to the unpackv // implementation would have unpacked directly to vector a. /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Copy/typecast vector c to vector a. // NOTE: Here, we use copynzv instead of copym because, in the cases // where we are unpacking/typecasting a real vector c to a complex // vector a, we want to touch only the real components of a, rather // than also set the imaginary components to zero. This comes about // because of the fact that, if we are unpacking real-to-complex, // then it is because all of the computation occurred in the real // domain, and so we would want to leave whatever imaginary values // there are in vector a untouched. Notice that for unpackings that // entail complex-to-complex data movements, the copynzv operation // behaves exactly as copym, so no use cases are lost (at least none // that I can think of). bli_copynzv( &c, a ); // NOTE: The above code/comment is outdated. What should happen is // as follows: // - If dt(a) is complex and dt(p) is real, then create an alias of // a and then tweak it so that it looks like a real domain object. // This will involve: // - projecting the datatype to real domain // - scaling both the row and column strides by 2 // ALL OF THIS should be done in the front-end, NOT here, as // unpackv() won't even be needed in that case. } */ }
void bli_trmm_int( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, trmm_t* cntl ) { obj_t a_local; obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_int_check( alpha, a, b, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { bli_scalm( beta, c ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) ) { bli_obj_induce_trans( c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( *a ) ) { side = 0; if ( bli_obj_root_is_lower( *a ) ) uplo = 0; else uplo = 1; } else // if ( bli_obj_root_is_triangular( *b ) ) { side = 1; // Set a bool based on the uplo field of A's root object. if ( bli_obj_root_is_lower( *b ) ) uplo = 0; else uplo = 1; } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[side][uplo][n][i]; // Invoke the variant. f( &a_local, &b_local, &c_local, cntl ); }
void bli_trsv_int( obj_t* alpha, obj_t* a, obj_t* x, cntx_t* cntx, trsv_t* cntl ) { varnum_t n; impl_t i; bool_t uplo; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsv_check( alpha, a, x ); // If A or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( a ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; // Alias A in case we need to induce a transformation (ie: transposition). bli_obj_alias_to( a, &a_local ); // NOTE: to support cases where B is complex and A is real, we will // need to have the default side case be BLIS_RIGHT and then express // the left case in terms of it, rather than the other way around. // Determine uplo (for indexing to the correct function pointer). if ( bli_obj_is_lower( &a_local ) ) uplo = 0; else uplo = 1; // We do not explicitly implement the cases where A is transposed. // However, we can still handle them. Specifically, if A is marked as // needing a transposition, we simply toggle the uplo value to cause the // correct algorithm to be induced. When that algorithm partitions into // A, it will grab the correct subpartitions, which will inherit A's // transposition bit and thus downstream subproblems will do the right // thing. Alternatively, we could accomplish the same end goal by // inducing a transposition, via bli_obj_induce_trans(), in the code // block below. That macro function swaps dimensions, strides, and // offsets. As an example, given a lower triangular, column-major matrix // that needs a transpose, we would induce that transposition by recasting // the object as an upper triangular, row-major matrix (with no transpose // needed). Note that how we choose to handle transposition here does NOT // affect the optimal choice of kernel (ie: a column-major column panel // matrix with transpose times a vector would use the same kernel as a // row-major row panel matrix with no transpose times a vector). if ( bli_obj_has_trans( &a_local ) ) { //bli_obj_induce_trans( &a_local ); //bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); if ( uplo == 1 ) uplo = 0; else uplo = 1; } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[uplo][n][i]; // Invoke the variant. f( alpha, &a_local, x, cntx, cntl ); }
void bli_her2_int( conj_t conjh, obj_t* alpha, obj_t* alpha_conj, obj_t* x, obj_t* y, obj_t* c, cntx_t* cntx, her2_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t alpha_local; obj_t alpha_conj_local; obj_t x_local; obj_t y_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_her2_check( alpha, x, y, c ); else bli_syr2_check( alpha, x, y, c ); } // If C, x, or y has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; if ( bli_obj_has_zero_dim( x ) ) return; if ( bli_obj_has_zero_dim( y ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( x, &x_local ); bli_obj_alias_to( y, &y_local ); bli_obj_alias_to( c, &c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( &c_local ) ) { bli_obj_toggle_conj( &c_local ); bli_obj_toggle_conj( &x_local ); bli_obj_toggle_conj( &y_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ), BLIS_CONJUGATE, alpha_conj, &alpha_conj_local ); } else { bli_obj_alias_to( *alpha, alpha_local ); bli_obj_alias_to( *alpha_conj, alpha_conj_local ); } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, &alpha_local, &alpha_conj_local, &x_local, &y_local, &c_local, cntx, cntl ); }
void bli_gemm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; gemm_voft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // If A or B is marked as being filled with zeros, scale C by beta and // return early. if ( bli_obj_is_zeros( *a ) || bli_obj_is_zeros( *b ) ) { // This should never execute. bli_abort(); if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // Alias A, B, and C in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( cntx, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Somewhat hackish support for 3m3, 3m2, and 4m1b method implementations. { ind_t im = bli_cntx_get_ind_method( cntx ); if ( im != BLIS_NAT ) { if ( im == BLIS_3M3 && f == bli_gemm_packa ) f = bli_gemm3m3_packa; else if ( im == BLIS_3M2 && f == bli_gemm_ker_var2 ) f = bli_gemm3m2_ker_var2; else if ( im == BLIS_4M1B && f == bli_gemm_ker_var2 ) f = bli_gemm4mb_ker_var2; } } // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, cntl, thread ); }
void bli_gemm_int( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, gemm_t* cntl, gemm_thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_int_check( alpha, a, b, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { if( thread_am_ochief( thread ) ) bli_scalm( beta, c ); thread_obarrier( thread ); return; } // If A or B is marked as being filled with zeros, scale C by beta and // return early. if ( bli_obj_is_zeros( *a ) || bli_obj_is_zeros( *b ) ) { if( thread_am_ochief( thread ) ) bli_scalm( beta, c ); thread_obarrier( thread ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) ) { //if( thread_am_ochief( thread ) ) { bli_obj_induce_trans( c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); // } } // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &a_local, &b_local, &c_local, cntl, thread ); }
void bli_trsm_int ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, rntm_t* rntm, cntl_t* cntl, thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; trsm_var_oft f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_basic_check( alpha, a, b, beta, c, cntx ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( a ) || bli_obj_has_zero_dim( b ) ) { if ( bli_thread_am_ochief( thread ) ) bli_scalm( beta, c ); bli_thread_obarrier( thread ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( a, &a_local ); bli_obj_alias_to( b, &b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( c, &c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( c ) ) { bli_obj_induce_trans( &c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &c_local ); } // If beta is non-unit, apply it to the scalar attached to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( a ) ) { // If alpha is non-unit, typecast and apply it to the scalar // attached to B (the non-triangular matrix). if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } } else // if ( bli_obj_root_is_triangular( b ) ) { // If alpha is non-unit, typecast and apply it to the scalar // attached to A (the non-triangular matrix). if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &a_local ); } } // FGVZ->TMS: Is this barrier still needed? bli_thread_obarrier( thread ); // Create the next node in the thrinfo_t structure. bli_thrinfo_grow( rntm, cntl, thread ); // Extract the function pointer from the current control tree node. f = bli_cntl_var_func( cntl ); // Invoke the variant. f ( &a_local, &b_local, &c_local, cntx, rntm, cntl, thread ); }
void bli_hemv_int( conj_t conjh, obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, cntx_t* cntx, hemv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) { if ( bli_is_conj( conjh ) ) bli_hemv_check( alpha, a, x, beta, y ); else bli_symv_check( alpha, a, x, beta, y ); } // If y has a zero dimension, return early. if ( bli_obj_has_zero_dim( *y ) ) return; // If x has a zero dimension, scale y by beta and return early. if ( bli_obj_has_zero_dim( *x ) ) { bli_scalm( beta, y ); return; } // Alias A in case we need to induce the upper triangular case. bli_obj_alias_to( *a, a_local ); /* // Our blocked algorithms only [explicitly] implement the lower triangular // case, so if matrix A is stored as upper triangular, we must toggle the // transposition (and conjugation) bits so that the diagonal partitioning // routines grab the correct partitions corresponding to the upper // triangular case. But we only need to do this for blocked algorithms, // since unblocked algorithms are responsible for handling the upper case // explicitly (and they should not be inspecting the transposition bit anyway). if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) ) { bli_obj_toggle_conj( a_local ); bli_obj_toggle_trans( a_local ); } */ // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &a_local, x, beta, y, cntx, cntl ); }
void bli_ger_int( conj_t conjx, conj_t conjy, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, cntx_t* cntx, ger_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t alpha_local; obj_t x_local; obj_t y_local; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_ger_check( alpha, x, y, a ); // If A has a zero dimension, return early. if ( bli_obj_has_zero_dim( a ) ) return; // If x or y has a zero dimension, return early. if ( bli_obj_has_zero_dim( x ) || bli_obj_has_zero_dim( y ) ) return; // Alias the objects, applying conjx and conjy to x and y, respectively. bli_obj_alias_with_conj( conjx, x, &x_local ); bli_obj_alias_with_conj( conjy, y, &y_local ); bli_obj_alias_to( a, &a_local ); // If matrix A is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( &a_local ) ) { bli_obj_toggle_conj( &a_local ); bli_obj_toggle_conj( &x_local ); bli_obj_toggle_conj( &y_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); } else { bli_obj_alias_to( *alpha, alpha_local ); } // If we are about the call a leaf-level implementation, and matrix A // still needs a transposition, then we must induce one by swapping the // strides and dimensions. if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) ) { bli_obj_induce_trans( &a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local ); } // Extract the variant number and implementation type. n = bli_cntl_var_num( cntl ); i = bli_cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &alpha_local, &x_local, &y_local, &a_local, cntx, cntl ); }