FLA_Error FLA_Copy_blk_var1( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ) { FLA_Obj AT, A0, AB, A1, A2; FLA_Obj BT, B0, BB, B1, B2; dim_t b; FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( B, &BT, &BB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ b = FLA_Determine_blocksize( AB, FLA_BOTTOM, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* ** */ &A1, AB, &A2, b, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( BT, &B0, /* ** */ /* ** */ &B1, BB, &B2, b, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Copy_internal( A1, B1, FLA_Cntl_sub_copy( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, A1, /* ** */ /* ** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &BT, B0, B1, /* ** */ /* ** */ &BB, B2, FLA_TOP ); } return FLA_SUCCESS; }
FLA_Error FLA_Copy( FLA_Obj A, FLA_Obj B ) { FLA_Error r_val; #ifdef FLA_ENABLE_BLAS1_FRONT_END_CNTL_TREES // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_check( A, B ); // Invoke FLA_Copy_internal() with flat control tree that simply calls // external wrapper. r_val = FLA_Copy_internal( A, B, fla_copy_cntl_blas ); #else r_val = FLA_Copy_external( A, B ); #endif return r_val; }
FLA_Error FLASH_Copy( FLA_Obj A, FLA_Obj B ) { FLA_Error r_val; // Check parameters. if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_check( A, B ); // Begin a parallel region. FLASH_Queue_begin(); // Execute tasks. r_val = FLA_Copy_internal( A, B, flash_copy_cntl ); // End the parallel region. FLASH_Queue_end(); return r_val; }
FLA_Error FLA_Copy_blk_var4( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ) { FLA_Obj AL, AR, A0, A1, A2; FLA_Obj BL, BR, B0, B1, B2; dim_t b; FLA_Part_1x2( A, &AL, &AR, 0, FLA_RIGHT ); FLA_Part_1x2( B, &BL, &BR, 0, FLA_RIGHT ); while ( FLA_Obj_width( AR ) < FLA_Obj_width( A ) ){ b = FLA_Determine_blocksize( AL, FLA_LEFT, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_1x2_to_1x3( AL, /**/ AR, &A0, &A1, /**/ &A2, b, FLA_LEFT ); FLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, &B1, /**/ &B2, b, FLA_LEFT ); /*------------------------------------------------------------*/ FLA_Copy_internal( A1, B1, FLA_Cntl_sub_copy( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_1x3_to_1x2( &AL, /**/ &AR, A0, /**/ A1, A2, FLA_RIGHT ); FLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, /**/ B1, B2, FLA_RIGHT ); } return FLA_SUCCESS; }
FLA_Error FLA_Copy_internal( FLA_Obj A, FLA_Obj B, fla_copy_t* cntl ) { FLA_Error r_val = FLA_SUCCESS; if ( FLA_Check_error_level() == FLA_FULL_ERROR_CHECKING ) FLA_Copy_internal_check( A, B, cntl ); if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_MATRIX && FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { // Recurse r_val = FLA_Copy_internal( *FLASH_OBJ_PTR_AT( A ), *FLASH_OBJ_PTR_AT( B ), flash_copy_cntl ); } else if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && FLASH_Queue_get_enabled( ) ) { // Enqueue ENQUEUE_FLASH_Copy( A, B, cntl ); } else { if ( FLA_Cntl_matrix_type( cntl ) == FLA_HIER && FLA_Obj_elemtype( A ) == FLA_SCALAR && !FLASH_Queue_get_enabled( ) ) { // Execute leaf cntl = flash_copy_cntl_blas; } // Parameter combinations if ( FLA_Cntl_variant( cntl ) == FLA_SUBPROBLEM ) { r_val = FLA_Copy_task( A, B, cntl ); } else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT1 ) { r_val = FLA_Copy_blk_var1( A, B, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT2 ) { r_val = FLA_Copy_blk_var2( A, B, cntl ); } #endif else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT3 ) { r_val = FLA_Copy_blk_var3( A, B, cntl ); } #ifdef FLA_ENABLE_NON_CRITICAL_CODE else if ( FLA_Cntl_variant( cntl ) == FLA_BLOCKED_VARIANT4 ) { r_val = FLA_Copy_blk_var4( A, B, cntl ); } #endif else { r_val = FLA_Check_error_code( FLA_NOT_YET_IMPLEMENTED ); } } return r_val; }
FLA_Error FLA_Copyr_u_blk_var4( FLA_Obj A, FLA_Obj B, fla_copyr_t* cntl ) { FLA_Obj ATL, ATR, A00, A01, A02, ABL, ABR, A10, A11, A12, A20, A21, A22; FLA_Obj BTL, BTR, B00, B01, B02, BBL, BBR, B10, B11, B12, B20, B21, B22; dim_t b; FLA_Part_2x2( A, &ATL, &ATR, &ABL, &ABR, 0, 0, FLA_BR ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &BBR, 0, 0, FLA_BR ); while ( FLA_Obj_min_dim( ATL ) > 0 ){ b = FLA_Determine_blocksize( ATL, FLA_TL, FLA_Cntl_blocksize( cntl ) ); FLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, &A10, &A11, /**/ &A12, /* ************* */ /* ******************** */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, FLA_TL ); FLA_Repart_2x2_to_3x3( BTL, /**/ BTR, &B00, &B01, /**/ &B02, &B10, &B11, /**/ &B12, /* ************* */ /* ******************** */ BBL, /**/ BBR, &B20, &B21, /**/ &B22, b, b, FLA_TL ); /*------------------------------------------------------------*/ // B11 = triu( A11 ); FLA_Copyr_internal( FLA_UPPER_TRIANGULAR, A11, B11, FLA_Cntl_sub_copyr( cntl ) ); // B01 = A01; FLA_Copy_internal( A01, B01, FLA_Cntl_sub_copy( cntl ) ); /*------------------------------------------------------------*/ FLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, FLA_BR ); FLA_Cont_with_3x3_to_2x2( &BTL, /**/ &BTR, B00, /**/ B01, B02, /* ************** */ /* ****************** */ B10, /**/ B11, B12, &BBL, /**/ &BBR, B20, /**/ B21, B22, FLA_BR ); } return FLA_SUCCESS; }