FLA_Error FLA_Check_identical_object_datatype( FLA_Obj A, FLA_Obj B ) { FLA_Error e_val = FLA_SUCCESS; if ( FLA_Obj_datatype( A ) != FLA_Obj_datatype( B ) ) e_val = FLA_OBJECT_DATATYPES_NOT_EQUAL; return e_val; }
FLA_Error FLA_Check_consistent_datatype( FLA_Datatype datatype, FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; if ( FLA_Obj_datatype( A ) != FLA_CONSTANT && datatype != FLA_CONSTANT ) if ( FLA_Obj_datatype( A ) != datatype ) e_val = FLA_INCONSISTENT_DATATYPES; return e_val; }
FLA_Error FLA_Check_consistent_object_datatype( FLA_Obj A, FLA_Obj B ) { FLA_Error e_val = FLA_SUCCESS; if ( FLA_Obj_datatype( A ) != FLA_CONSTANT && FLA_Obj_datatype( B ) != FLA_CONSTANT ) if ( FLA_Obj_datatype( A ) != FLA_Obj_datatype( B ) ) e_val = FLA_INCONSISTENT_DATATYPES; return e_val; }
FLA_Error REF_Syrk_ln( FLA_Obj A, FLA_Obj C ) { FLA_Datatype datatype; int k, m, ldim_A, ldim_C; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_ldim( A ); ldim_C = FLA_Obj_ldim( C ); k = FLA_Obj_width( A ); m = FLA_Obj_length( A ); switch( datatype ){ case FLA_DOUBLE: { double *buff_A, *buff_C, d_one=1.0; buff_A = ( double * ) FLA_Obj_buffer_at_view( A ); buff_C = ( double * ) FLA_Obj_buffer_at_view( C ); dsyrk_( "L", "N", &m, &k, &d_one, buff_A, &ldim_A, &d_one, buff_C, &ldim_C ); } break; } return 0; }
FLA_Error REF_Gemm_nn( FLA_Obj A, FLA_Obj B, FLA_Obj C ) { FLA_Datatype datatype; int m, k, n, ldim_A, ldim_B, ldim_C; datatype = FLA_Obj_datatype( A ); ldim_A = FLA_Obj_ldim( A ); ldim_B = FLA_Obj_ldim( B ); ldim_C = FLA_Obj_ldim( C ); m = FLA_Obj_length( A ); k = FLA_Obj_width( A ); n = FLA_Obj_width( B ); switch( datatype ){ case FLA_DOUBLE: { double *buff_A, *buff_B, *buff_C, d_one=1.0; buff_A = ( double * ) FLA_Obj_buffer_at_view( A ); buff_B = ( double * ) FLA_Obj_buffer_at_view( B ); buff_C = ( double * ) FLA_Obj_buffer_at_view( C ); FLA_C2F( dgemm )( "N", "N", &m, &n, &k, &d_one, buff_A, &ldim_A, buff_B, &ldim_B, &d_one, buff_C, &ldim_C ); } break; } return 0; }
FLA_Error FLA_Chol_u_opt_var3( FLA_Obj A ) { FLA_Error r_val = FLA_SUCCESS; FLA_Datatype datatype; int mn_A; int rs_A, cs_A; datatype = FLA_Obj_datatype( A ); mn_A = FLA_Obj_length( A ); rs_A = FLA_Obj_row_stride( A ); cs_A = FLA_Obj_col_stride( A ); switch ( datatype ) { case FLA_FLOAT: { float* buff_A = FLA_FLOAT_PTR( A ); r_val = FLA_Chol_u_ops_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE: { double* buff_A = FLA_DOUBLE_PTR( A ); r_val = FLA_Chol_u_opd_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_COMPLEX: { scomplex* buff_A = FLA_COMPLEX_PTR( A ); r_val = FLA_Chol_u_opc_var3( mn_A, buff_A, rs_A, cs_A ); break; } case FLA_DOUBLE_COMPLEX: { dcomplex* buff_A = FLA_DOUBLE_COMPLEX_PTR( A ); r_val = FLA_Chol_u_opz_var3( mn_A, buff_A, rs_A, cs_A ); break; } } return r_val; }
FLA_Error FLA_Tevd_eigval_v_opt_var1( FLA_Obj G, FLA_Obj d, FLA_Obj e, FLA_Obj k ) { FLA_Datatype datatype; int m_A, n_G; int rs_G, cs_G; int inc_d; int inc_e; datatype = FLA_Obj_datatype( d ); m_A = FLA_Obj_vector_dim( d ); n_G = FLA_Obj_width( G ); rs_G = FLA_Obj_row_stride( G ); cs_G = FLA_Obj_col_stride( G ); inc_d = FLA_Obj_vector_inc( d ); inc_e = FLA_Obj_vector_inc( e ); switch ( datatype ) { case FLA_FLOAT: { scomplex* buff_G = FLA_COMPLEX_PTR( G ); float* buff_d = FLA_FLOAT_PTR( d ); float* buff_e = FLA_FLOAT_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_ops_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } case FLA_DOUBLE: { dcomplex* buff_G = FLA_DOUBLE_COMPLEX_PTR( G ); double* buff_d = FLA_DOUBLE_PTR( d ); double* buff_e = FLA_DOUBLE_PTR( e ); int* buff_k = FLA_INT_PTR( k ); FLA_Tevd_eigval_v_opd_var1( m_A, n_G, buff_G, rs_G, cs_G, buff_d, inc_d, buff_e, inc_e, buff_k ); break; } } return FLA_SUCCESS; }
FLA_Error FLA_Norm_inf( FLA_Obj A, FLA_Obj norm ) { FLA_Obj AT, A0, AB, a1t, A2; FLA_Obj bT, b0, bB, beta1, b2; FLA_Obj b; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Norm_inf_check( A, norm ); FLA_Obj_create( FLA_Obj_datatype( A ), FLA_Obj_length( A ), 1, 0, 0, &b ); FLA_Part_2x1( A, &AT, &AB, 0, FLA_TOP ); FLA_Part_2x1( b, &bT, &bB, 0, FLA_TOP ); while ( FLA_Obj_length( AT ) < FLA_Obj_length( A ) ){ FLA_Repart_2x1_to_3x1( AT, &A0, /* ** */ /* *** */ &a1t, AB, &A2, 1, FLA_BOTTOM ); FLA_Repart_2x1_to_3x1( bT, &b0, /* ** */ /* ***** */ &beta1, bB, &b2, 1, FLA_BOTTOM ); /*------------------------------------------------------------*/ FLA_Asum( a1t, beta1 ); /*------------------------------------------------------------*/ FLA_Cont_with_3x1_to_2x1( &AT, A0, a1t, /* ** */ /* *** */ &AB, A2, FLA_TOP ); FLA_Cont_with_3x1_to_2x1( &bT, b0, beta1, /* ** */ /* ***** */ &bB, b2, FLA_TOP ); } FLA_Max_abs_value( b, norm ); FLA_Obj_free( &b ); return FLA_SUCCESS; }
FLA_Error FLA_Obj_create_conf_to( FLA_Trans trans, FLA_Obj obj_cur, FLA_Obj *obj_new ) { FLA_Datatype datatype; FLA_Elemtype elemtype; dim_t m, n; dim_t rs, cs; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_create_conf_to_check( trans, obj_cur, obj_new ); datatype = FLA_Obj_datatype( obj_cur ); elemtype = FLA_Obj_elemtype( obj_cur ); // Query the dimensions of the existing object. if ( trans == FLA_NO_TRANSPOSE || trans == FLA_CONJ_NO_TRANSPOSE ) { m = FLA_Obj_length( obj_cur ); n = FLA_Obj_width( obj_cur ); } else // if ( trans == FLA_TRANSPOSE || trans == FLA_CONJ_TRANSPOSE ) { m = FLA_Obj_width( obj_cur ); n = FLA_Obj_length( obj_cur ); } // Query the row and column strides of the existing object. We don't care // about the actual leading dimension of the existing object, only whether // it is in row- or column-major format. rs = FLA_Obj_row_stride( obj_cur ); cs = FLA_Obj_col_stride( obj_cur ); if ( ( rs == 1 && cs == 1 ) ) { // Do nothing. This special case will be handled by FLA_adjust_strides(). ; } else if ( rs == 1 ) { // For column-major storage, use the m dimension as the column stride. // Row stride is already set to 1. cs = m; } else if ( cs == 1 ) { // For row-major storage, use the n dimension as the row stride. // Column stride is already set to 1. rs = n; } // Handle empty views. if ( m == 0 ) cs = 1; if ( n == 0 ) rs = 1; FLA_Obj_create_ext( datatype, elemtype, m, n, m, n, rs, cs, obj_new ); return FLA_SUCCESS; }
FLA_Error FLA_Absolute_value( FLA_Obj alpha ) { FLA_Datatype datatype; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Absolute_value_check( alpha ); datatype = FLA_Obj_datatype( alpha ); switch ( datatype ){ case FLA_FLOAT: { float *buff_alpha = ( float * ) FLA_FLOAT_PTR( alpha ); *buff_alpha = ( float ) fabs( ( double ) *buff_alpha ); break; } case FLA_DOUBLE: { double *buff_alpha = ( double * ) FLA_DOUBLE_PTR( alpha ); *buff_alpha = fabs( *buff_alpha ); break; } case FLA_COMPLEX: { scomplex *buff_alpha = ( scomplex * ) FLA_COMPLEX_PTR( alpha ); buff_alpha->real = ( float ) sqrt( ( double ) buff_alpha->real * buff_alpha->real + buff_alpha->imag * buff_alpha->imag ); buff_alpha->imag = 0.0F; break; } case FLA_DOUBLE_COMPLEX: { dcomplex *buff_alpha = ( dcomplex * ) FLA_DOUBLE_COMPLEX_PTR( alpha ); buff_alpha->real = sqrt( buff_alpha->real * buff_alpha->real + buff_alpha->imag * buff_alpha->imag ); buff_alpha->imag = 0.0; break; } } return FLA_SUCCESS; }
FLA_Error FLA_Check_valid_object_datatype( FLA_Obj A ) { FLA_Error e_val; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); e_val = FLA_Check_valid_datatype( datatype ); return e_val; }
FLA_Error FLA_Check_complex_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_complex_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_COMPLEX; return e_val; }
FLA_Error FLA_Check_real_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_real_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_REAL; return e_val; }
FLA_Error FLA_Check_int_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_int_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_INTEGER; return e_val; }
FLA_Error FLA_Check_floating_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_floating_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_FLOATING_POINT; return e_val; }
FLA_Error FLA_Check_nonconstant_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_nonconstant_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_NONCONSTANT; return e_val; }
FLA_Error FLA_Check_identical_object_precision( FLA_Obj A, FLA_Obj B ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype_A; FLA_Datatype datatype_B; dim_t precision_A; dim_t precision_B; datatype_A = FLA_Obj_datatype( A ); datatype_B = FLA_Obj_datatype( B ); if ( datatype_A == FLA_CONSTANT || datatype_B == FLA_CONSTANT ) { return FLA_SUCCESS; } if ( FLA_Check_floating_object( A ) != FLA_SUCCESS || FLA_Check_floating_object( B ) != FLA_SUCCESS ) { return FLA_OBJECT_NOT_FLOATING_POINT; } datatype_A = FLA_Obj_datatype( A ); datatype_B = FLA_Obj_datatype( B ); precision_A = FLA_Obj_datatype_size( datatype_A ); precision_B = FLA_Obj_datatype_size( datatype_B ); if ( FLA_Obj_is_complex( A ) ) precision_A = precision_A / 2; if ( FLA_Obj_is_complex( B ) ) precision_B = precision_B / 2; if ( precision_A != precision_B ) e_val = FLA_INCONSISTENT_OBJECT_PRECISION; return e_val; }
FLA_Error FLA_Check_comparable_object( FLA_Obj A ) { FLA_Error e_val = FLA_SUCCESS; FLA_Datatype datatype; datatype = FLA_Obj_datatype( A ); if ( FLA_Check_int_datatype( datatype ) != FLA_SUCCESS && FLA_Check_real_datatype( datatype ) != FLA_SUCCESS ) e_val = FLA_OBJECT_NOT_COMPARABLE; return e_val; }
FLA_Bool FLA_Obj_is_double_precision( FLA_Obj A ) { FLA_Datatype datatype; FLA_Bool r_val; datatype = FLA_Obj_datatype( A ); if ( datatype == FLA_CONSTANT || datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) r_val = TRUE; else r_val = FALSE; return r_val; }
FLA_Bool FLA_Obj_is_constant( FLA_Obj A ) { FLA_Datatype datatype; FLA_Bool r_val; datatype = FLA_Obj_datatype( A ); if ( datatype == FLA_CONSTANT ) r_val = TRUE; else r_val = FALSE; return r_val; }
FLA_Bool FLA_Obj_is_real( FLA_Obj A ) { FLA_Datatype datatype; FLA_Bool r_val; datatype = FLA_Obj_datatype( A ); if ( datatype == FLA_CONSTANT || datatype == FLA_FLOAT || datatype == FLA_DOUBLE ) r_val = TRUE; else r_val = FALSE; return r_val; }
FLA_Bool FLA_Obj_is_complex( FLA_Obj A ) { FLA_Datatype datatype; FLA_Bool r_val; datatype = FLA_Obj_datatype( A ); if ( datatype == FLA_CONSTANT || datatype == FLA_COMPLEX || datatype == FLA_DOUBLE_COMPLEX ) r_val = TRUE; else r_val = FALSE; return r_val; }
FLA_Error FLA_Check_householder_panel_dims( FLA_Obj A, FLA_Obj T ) { FLA_Error e_val = FLA_SUCCESS; dim_t nb_alg; nb_alg = FLA_Query_blocksize( FLA_Obj_datatype( A ), FLA_DIMENSION_MIN ); if ( FLA_Obj_length( T ) < nb_alg ) e_val = FLA_HOUSEH_PANEL_MATRIX_TOO_SMALL; if ( FLA_Obj_width( T ) < FLA_Obj_min_dim( A ) ) e_val = FLA_HOUSEH_PANEL_MATRIX_TOO_SMALL; return e_val; }
FLA_Bool FLA_Obj_is_floating_point( FLA_Obj A ) { FLA_Datatype datatype; FLA_Bool r_val; datatype = FLA_Obj_datatype( A ); if ( datatype == FLA_FLOAT || datatype == FLA_COMPLEX || datatype == FLA_DOUBLE || datatype == FLA_DOUBLE_COMPLEX ) r_val = TRUE; else r_val = FALSE; return r_val; }
dim_t FLA_Obj_elem_size( FLA_Obj obj ) { dim_t elem_size = 0; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Obj_elem_size_check( obj ); if ( FLA_Obj_elemtype( obj ) == FLA_MATRIX ) { elem_size = sizeof( FLA_Obj ); } else // if ( FLA_Obj_elemtype( obj ) == FLA_SCALAR ) { elem_size = FLA_Obj_datatype_size( FLA_Obj_datatype( obj ) ); } return elem_size; }
FLA_Error FLA_Bidiag_UT_create_T( FLA_Obj A, FLA_Obj* TU, FLA_Obj* TV ) { FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_BIDIAG_INNER_TO_OUTER_B_RATIO ); // Query the minimum dimension of A. k = FLA_Obj_min_dim( A ); b_alg = 5; // Adjust the blocksize with respect to the min-dim of A. b_alg = min( b_alg, k ); // Figure out whether TU and TV should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create two b_alg x k matrices to hold the block Householder transforms // that will be accumulated within the bidiagonal reduction algorithm. // If the matrix dimension has a zero dimension, apply_q complains it. if ( TU != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TU ); if ( TV != NULL ) FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, TV ); return FLA_SUCCESS; }
// ============================================================================ void compute_case1( int m, int n, int k, int l, FLA_Obj cb_A, FLA_Obj cb_B, FLA_Obj C, int print_data ) { FLA_Obj slice_A, slice_B; int datatype, h; double * buff_cb_A, * buff_cb_B; // Some initializations. datatype = FLA_Obj_datatype( cb_A ); buff_cb_A = ( double * ) FLA_Obj_buffer_at_view( cb_A ); buff_cb_B = ( double * ) FLA_Obj_buffer_at_view( cb_B ); // Prepare temporal slices. FLA_Obj_create_without_buffer( datatype, m, k, & slice_A ); FLA_Obj_create_without_buffer( datatype, n, k, & slice_B ); // Initialize matrix C for the result. MyFLA_Obj_set_to_zero( C ); // Show data. if( print_data == 1 ) { FLA_Obj_show( " Ci = [ ", C, "%le", " ];" ); FLA_Obj_show( " cb_A = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B = [ ", cb_B, "%le", " ];" ); } // Perform computation. for( h = 0; h < l; h++ ) { FLA_Obj_attach_buffer( buff_cb_A + m * k * h, 1, m, & slice_A ); FLA_Obj_attach_buffer( buff_cb_B + n * k * h, 1, n, & slice_B ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, slice_A, slice_B, FLA_ONE, C ); } // Remove temporal slices. FLA_Obj_free_without_buffer( & slice_A ); FLA_Obj_free_without_buffer( & slice_B ); // Show data. if( print_data == 1 ) { FLA_Obj_show( " Cf = [ ", C, "%le", " ];" ); } }
FLA_Error FLA_Copy_object_to_buffer( FLA_Trans trans, dim_t i, dim_t j, FLA_Obj A, dim_t m, dim_t n, void* B_buffer, dim_t rs, dim_t cs ) { FLA_Obj B; FLA_Obj ATL, ATR, ABL, Aij; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_object_to_buffer_check( trans, i, j, A, m, n, B_buffer, rs, cs ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &Aij, i, j, FLA_TL ); FLA_Obj_create_without_buffer( FLA_Obj_datatype( A ), m, n, &B ); FLA_Obj_attach_buffer( B_buffer, rs, cs, &B ); FLA_Copyt_external( trans, Aij, B ); FLA_Obj_free_without_buffer( &B ); return FLA_SUCCESS; }
FLA_Error FLA_Copy_buffer_to_object( FLA_Trans trans, dim_t m, dim_t n, void* A_buffer, dim_t rs, dim_t cs, dim_t i, dim_t j, FLA_Obj B ) { FLA_Obj A; FLA_Obj BTL, BTR, BBL, Bij; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_buffer_to_object_check( trans, m, n, A_buffer, rs, cs, i, j, B ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &Bij, i, j, FLA_TL ); FLA_Obj_create_without_buffer( FLA_Obj_datatype( B ), m, n, &A ); FLA_Obj_attach_buffer( A_buffer, rs, cs, &A ); FLA_Copyt_external( trans, A, Bij ); FLA_Obj_free_without_buffer( &A ); return FLA_SUCCESS; }
FLA_Error FLA_LQ_UT_create_T( FLA_Obj A, FLA_Obj* T ) { FLA_Datatype datatype; dim_t b_alg, k; dim_t rs_T, cs_T; // Query the datatype of A. datatype = FLA_Obj_datatype( A ); // Query the blocksize from the library. b_alg = FLA_Query_blocksize( datatype, FLA_DIMENSION_MIN ); // Scale the blocksize by a pre-set global constant. b_alg = ( dim_t )( ( ( double ) b_alg ) * FLA_LQ_INNER_TO_OUTER_B_RATIO ); // Adjust the blocksize with respect to the min-dim of A. b_alg = min(b_alg, FLA_Obj_min_dim( A )); // Query the length of A. k = FLA_Obj_length( A ); // Figure out whether T should be row-major or column-major. if ( FLA_Obj_row_stride( A ) == 1 ) { rs_T = 1; cs_T = b_alg; } else // if ( FLA_Obj_col_stride( A ) == 1 ) { rs_T = k; cs_T = 1; } // Create a b_alg x k matrix to hold the block Householder transforms that // will be accumulated within the LQ factorization algorithm. FLA_Obj_create( datatype, b_alg, k, rs_T, cs_T, T ); return FLA_SUCCESS; }