void bli_scalv_int( obj_t* beta, obj_t* x, scalv_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalv_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if the beta scalar equals one. if ( bli_obj_equals( beta, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( beta, x ); }
void bli_her_int( conj_t conjh, obj_t* alpha, obj_t* x, obj_t* c, her_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t x_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her_int_check( conjh, alpha, x, c, cntl ); // If C or x has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; if ( bli_obj_has_zero_dim( *x ) ) return; // Alias the operands in case we need to apply conjugations. bli_obj_alias_to( *x, x_local ); bli_obj_alias_to( *c, c_local ); // If matrix C is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( c_local ) ) { bli_obj_toggle_conj( c_local ); // Notice that we don't need to conjugate alpha since it is guaranteed // to be real. bli_obj_toggle_conj( x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( conjh, alpha, &x_local, &c_local, cntl ); }
void bli_scalm_int( obj_t* beta, obj_t* x, scalm_t* cntl ) { obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_scalm_int_check( beta, x, cntl ); // First check if we are to skip this operation. if ( cntl_is_noop( cntl ) ) return; // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if both beta and the scalar attached to x are unit. if ( bli_obj_equals( beta, &BLIS_ONE ) && bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; // Alias x to x_local so we can apply beta if it is non-unit. bli_obj_alias_to( *x, x_local ); // If beta is non-unit, apply it to the scalar attached to x. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &x_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &x_local ); }
void bli_unpackv_int( obj_t* p, obj_t* a, cntx_t* cntx, unpackv_t* cntl ) { // The unpackv operation consists of an optional casting post-process. // (This post-process is analogous to the cast pre-process in packv.) // Here are the following possible ways unpackv can execute: // 1. unpack and cast: Unpack to a temporary vector c and then cast // c to a. // 2. unpack only: Unpack directly to vector a since typecasting is // not needed. // 3. cast only: Not yet supported / not used. // 4. no-op: The control tree directs us to skip the unpack operation // entirely. No action is taken. obj_t c; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_unpackv_check( p, a, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply return. if ( cntl_is_noop( cntl ) ) { return; } // If p was aliased to a during the pack stage (because it was already // in an acceptable packed/contiguous format), then no unpack is actually // necessary, so we return. if ( bli_obj_is_alias_of( *p, *a ) ) { return; } // Now, if we are not skipping the unpack operation, then the only // question left is whether we are to typecast vector a after unpacking. if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) bli_abort(); /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Initialize an object c for the intermediate typecast vector. bli_unpackv_init_cast( p, a, &c ); } else */ { // If no cast is needed, then aliasing object c to the original // vector serves as a minor optimization. This causes the unpackv // implementation to unpack directly into vector a. bli_obj_alias_to( *a, c ); } // Now we are ready to proceed with the unpacking. // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( p, &c, cntx, cntl ); // Now, if necessary, we cast the contents of c to vector a. If casting // was not necessary, then we are done because the call to the unpackv // implementation would have unpacked directly to vector a. /* if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) ) { // Copy/typecast vector c to vector a. // NOTE: Here, we use copynzv instead of copym because, in the cases // where we are unpacking/typecasting a real vector c to a complex // vector a, we want to touch only the real components of a, rather // than also set the imaginary components to zero. This comes about // because of the fact that, if we are unpacking real-to-complex, // then it is because all of the computation occurred in the real // domain, and so we would want to leave whatever imaginary values // there are in vector a untouched. Notice that for unpackings that // entail complex-to-complex data movements, the copynzv operation // behaves exactly as copym, so no use cases are lost (at least none // that I can think of). bli_copynzv( &c, a ); // NOTE: The above code/comment is outdated. What should happen is // as follows: // - If dt(a) is complex and dt(p) is real, then create an alias of // a and then tweak it so that it looks like a real domain object. // This will involve: // - projecting the datatype to real domain // - scaling both the row and column strides by 2 // ALL OF THIS should be done in the front-end, NOT here, as // unpackv() won't even be needed in that case. } */ }
void bli_trmm_int( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, trmm_t* cntl ) { obj_t a_local; obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_int_check( alpha, a, b, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { bli_scalm( beta, c ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) ) { bli_obj_induce_trans( c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). if ( bli_obj_root_is_triangular( *a ) ) { side = 0; if ( bli_obj_root_is_lower( *a ) ) uplo = 0; else uplo = 1; } else // if ( bli_obj_root_is_triangular( *b ) ) { side = 1; // Set a bool based on the uplo field of A's root object. if ( bli_obj_root_is_lower( *b ) ) uplo = 0; else uplo = 1; } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[side][uplo][n][i]; // Invoke the variant. f( &a_local, &b_local, &c_local, cntl ); }
void bli_packv_int( obj_t* a, obj_t* p, cntx_t* cntx, packv_t* cntl ) { // The packv operation consists of an optional typecasting pre-process. // Here are the following possible ways packv can execute: // 1. cast and pack: When typecasting and packing are both // precribed, typecast a to temporary vector c and then pack // c to p. // 2. pack only: Typecasting is skipped when it is not needed; // simply pack a directly to p. // 3. cast only: Not yet supported / not used. // 4. no-op: The control tree sometimes directs us to skip the // pack operation entirely. Alias p to a and return. //obj_t c; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packv_check( a, p, cntx ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. //if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL. We return without taking any action because a was already // aliased to p in packv_init(). if ( cntl_is_noop( cntl ) ) { return; } // Let us now check to see if the object has already been packed. First // we check if it has been packed to an unspecified (row or column) // format, in which case we can return, since by now aliasing has already // taken place in packv_init(). // NOTE: The reason we don't need to even look at the control tree in // this case is as follows: an object's pack status is only set to // BLIS_PACKED_UNSPEC for situations when the actual format used is // not important, as long as its packed into contiguous rows or // contiguous columns. A good example of this is packing for matrix // operands in the level-2 operations. if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC ) { return; } // At this point, we can be assured that cntl is not NULL. Now we check // if the object has already been packed to the desired schema (as en- // coded in the control tree). If so, we can return, as above. // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // and thus packing will be called for (but in some cases packing has // already taken place, or does not need to take place, and so that will // be indicated by the pack status). Also, not all combinations of // current pack status and desired pack schema are valid. if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) ) { return; } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( a, p, cntx, cntl ); }
void bli_gemm_int( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, gemm_t* cntl, gemm_thrinfo_t* thread ) { obj_t a_local; obj_t b_local; obj_t c_local; varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_int_check( alpha, a, b, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; // If A or B has a zero dimension, scale C by beta and return early. if ( bli_obj_has_zero_dim( *a ) || bli_obj_has_zero_dim( *b ) ) { if( thread_am_ochief( thread ) ) bli_scalm( beta, c ); thread_obarrier( thread ); return; } // If A or B is marked as being filled with zeros, scale C by beta and // return early. if ( bli_obj_is_zeros( *a ) || bli_obj_is_zeros( *b ) ) { if( thread_am_ochief( thread ) ) bli_scalm( beta, c ); thread_obarrier( thread ); return; } // Alias A and B in case we need to update attached scalars. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); // If we are about to call a leaf-level implementation, and matrix C // still needs a transposition, then we must induce one by swapping the // strides and dimensions. Note that this transposition would normally // be handled explicitly in the packing of C, but if C is not being // packed, this is our last chance to handle the transposition. if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) ) { //if( thread_am_ochief( thread ) ) { bli_obj_induce_trans( c_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); // } } // If alpha is non-unit, typecast and apply it to the scalar attached // to B. if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( alpha, &b_local ); } // If beta is non-unit, typecast and apply it to the scalar attached // to C. if ( !bli_obj_equals( beta, &BLIS_ONE ) ) { bli_obj_scalar_apply_scalar( beta, &c_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &a_local, &b_local, &c_local, cntl, thread ); }
void bli_packm_int( obj_t* a, obj_t* p, packm_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_packm_int_check( a, p, cntl ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. //if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); // First check if we are to skip this operation because the control tree // is NULL. We return without taking any action because a was already // aliased to p in packm_init(). if ( cntl_is_noop( cntl ) ) { return; } // Let us now check to see if the object has already been packed. First // we check if it has been packed to an unspecified (row or column) // format, in which case we can return, since by now aliasing has already // taken place in packm_init(). // NOTE: The reason we don't need to even look at the control tree in // this case is as follows: an object's pack status is only set to // BLIS_PACKED_UNSPEC for situations when the actual format used is // not important, as long as its packed into contiguous rows or // contiguous columns. A good example of this is packing for matrix // operands in the level-2 operations. if ( bli_obj_pack_status( *a ) == BLIS_PACKED_UNSPEC ) { return; } // At this point, we can be assured that cntl is not NULL. Now we check // if the object has already been packed to the desired schema (as en- // coded in the control tree). If so, we can return, as above. // NOTE: In most cases, an object's pack status will be BLIS_NOT_PACKED // and thus packing will be called for (but in some cases packing has // already taken place, or does not need to take place, and so that will // be indicated by the pack status). Also, not all combinations of // current pack status and desired pack schema are valid. if ( bli_obj_pack_status( *a ) == cntl_pack_schema( cntl ) ) { return; } /* // The value for kappa we use will depend on whether the scalar // attached to A has a nonzero imaginary component. If it does, // then we will apply the scalar during packing to facilitate // implementing complex domain micro-kernels in terms of their // real domain counterparts. (In the aforementioned situation, // applying a real scalar is easy, but applying a complex one is // harder, so we avoid the need altogether with the code below.) if ( bli_obj_scalar_has_nonzero_imag( a ) ) { bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); // Detach the scalar. bli_obj_scalar_detach( a, &kappa ); // Reset the attached scalar (to 1.0). bli_obj_scalar_reset( a ); kappa_p = κ } else { // If the internal scalar of A has only a real component, then // we will apply it later (in the micro-kernel), and so we will // use BLIS_ONE to indicate no scaling during packing. kappa_p = &BLIS_ONE; } */ // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant with kappa_use. f( a, p ); }
void bli_ger_int( conj_t conjx, conj_t conjy, obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, ger_t* cntl ) { varnum_t n; impl_t i; FUNCPTR_T f; obj_t alpha_local; obj_t x_local; obj_t y_local; obj_t a_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_ger_int_check( alpha, x, y, a, cntl ); // If A has a zero dimension, return early. if ( bli_obj_has_zero_dim( *a ) ) return; // If x or y has a zero dimension, return early. if ( bli_obj_has_zero_dim( *x ) || bli_obj_has_zero_dim( *y ) ) return; // Alias the objects, applying conjx and conjy to x and y, respectively. bli_obj_alias_with_conj( conjx, *x, x_local ); bli_obj_alias_with_conj( conjy, *y, y_local ); bli_obj_alias_to( *a, a_local ); // If matrix A is marked for conjugation, we interpret this as a request // to apply a conjugation to the other operands. if ( bli_obj_has_conj( a_local ) ) { bli_obj_toggle_conj( a_local ); bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( y_local ); bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); } else { bli_obj_alias_to( *alpha, alpha_local ); } // If we are about the call a leaf-level implementation, and matrix A // still needs a transposition, then we must induce one by swapping the // strides and dimensions. if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( a_local ) ) { bli_obj_induce_trans( a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); // Index into the variant array to extract the correct function pointer. f = vars[n][i]; // Invoke the variant. f( &alpha_local, &x_local, &y_local, &a_local, cntl ); }