void bli_ger_blk_var1( obj_t* alpha, obj_t* x, obj_t* y, obj_t* a, ger_t* cntl ) { obj_t a1, a1_pack; obj_t x1, x1_pack; dim_t i; dim_t b_alg; dim_t m_trans; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( *a ); // Partition along the m dimension. for ( i = 0; i < m_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, m_trans, a, cntl_blocksize( cntl ) ); // Acquire partitions for A1 and x1. bli_acquire_mpart_t2b( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, x, &x1 ); // Initialize objects for packing A1 and x1 (if needed). bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_init( &x1, &x1_pack, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); bli_packv_int( &x1, &x1_pack, cntl_sub_packv_x( cntl ) ); // A1 = A1 + alpha * x1 * y; bli_ger_int( BLIS_NO_CONJUGATE, BLIS_NO_CONJUGATE, alpha, &x1_pack, y, &a1_pack, cntl_sub_ger( cntl ) ); // Copy/unpack A1 (if A1 was packed). bli_unpackm_int( &a1_pack, &a1, cntl_sub_unpackm_a( cntl ), &BLIS_PACKM_SINGLE_THREADED ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_packm_release( &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_release( &x1_pack, cntl_sub_packv_x( cntl ) ); }
void bli_gemv_blk_var2( obj_t* alpha, obj_t* a, obj_t* x, obj_t* beta, obj_t* y, gemv_t* cntl ) { obj_t a1, a1_pack; obj_t x1, x1_pack; dim_t n_trans; dim_t i; dim_t b_alg; // Initialize objects for packing. bli_obj_init_pack( &a1_pack ); bli_obj_init_pack( &x1_pack ); // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *a ); // y = beta * y; bli_scalv_int( beta, y, cntl_sub_scalv( cntl ) ); // Partition along the "k" dimension (n dimension of A). for ( i = 0; i < n_trans; i += b_alg ) { // Determine the current algorithmic blocksize. b_alg = bli_determine_blocksize_f( i, n_trans, a, cntl_blocksize( cntl ) ); // Acquire partitions for A1 and x1. bli_acquire_mpart_l2r( BLIS_SUBPART1, i, b_alg, a, &a1 ); bli_acquire_vpart_f2b( BLIS_SUBPART1, i, b_alg, x, &x1 ); // Initialize objects for packing A1 and x1 (if needed). bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_init( &x1, &x1_pack, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &x1, &x1_pack, cntl_sub_packv_x( cntl ) ); // y = y + alpha * A1 * x1; bli_gemv_int( BLIS_NO_TRANSPOSE, BLIS_NO_CONJUGATE, alpha, &a1_pack, &x1_pack, &BLIS_ONE, y, cntl_sub_gemv( cntl ) ); } // If any packing buffers were acquired within packm, release them back // to the memory manager. bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &x1_pack ); }