void bli_gemm_front ( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { #ifdef BLIS_SMALL_MATRIX_ENABLE #ifndef BLIS_ENABLE_MULTITHREADING gint_t status = bli_gemm_small_matrix(alpha, a, b, beta, c, cntx, cntl); if(BLIS_SUCCESS != status) #endif #endif { obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Reinitialize the memory allocator to accommodate the blocksizes // in the current context. bli_memsys_reinit( cntx ); // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( a_local, b_local ); bli_obj_induce_trans( a_local ); bli_obj_induce_trans( b_local ); bli_obj_induce_trans( c_local ); } // Set the operation family id in the context. bli_cntx_set_family( BLIS_GEMM, cntx ); // Record the threading for each level within the context. bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx, bli_obj_length( c_local ), bli_obj_width( c_local ), bli_obj_width( a_local ) ); // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator ( bli_gemm_int, alpha, &a_local, &b_local, beta, &c_local, cntx, cntl ); } }
void bli_symm_front ( side_t side, obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c, cntx_t* cntx, cntl_t* cntl ) { obj_t a_local; obj_t b_local; obj_t c_local; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symm_check( side, alpha, a, b, beta, c, cntx ); // If alpha is zero, scale by beta and return. if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; } // Reinitialize the memory allocator to accommodate the blocksizes // in the current context. bli_memsys_reinit( cntx ); // Alias A, B, and C in case we need to apply transformations. bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( *b, b_local ); bli_obj_alias_to( *c, c_local ); // An optimization: If C is stored by rows and the micro-kernel prefers // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( side ); bli_obj_induce_trans( b_local ); bli_obj_induce_trans( c_local ); } // Swap A and B if multiplying A from the right so that "B" contains // the symmetric matrix. if ( bli_is_right( side ) ) { bli_obj_swap( a_local, b_local ); } // Set the operation family id in the context. bli_cntx_set_family( BLIS_GEMM, cntx ); // Record the threading for each level within the context. bli_cntx_set_thrloop_from_env( BLIS_SYMM, BLIS_LEFT, cntx ); // Invoke the internal back-end. bli_l3_thread_decorator ( bli_gemm_int, alpha, &a_local, &b_local, beta, &c_local, cntx, cntl ); }