int phonopy_pinv_libflame(double *matrix, double *eigvals, const int size, const double cutoff) { FLA_Obj A, B, l; /* FLA_Obj C; */ double *inv_eigvals; int i; inv_eigvals = (double*)malloc(sizeof(double) * size); FLA_Init(); FLA_Obj_create_without_buffer(FLA_DOUBLE, size, size, &A); FLA_Obj_attach_buffer(matrix, 0, 0, &A); FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l); FLA_Obj_attach_buffer(eigvals, 0, 0, &l); /* Eigensolver */ FLA_Obj_create_copy_of(FLA_NO_TRANSPOSE, A, &B); FLA_Hevd(FLA_EVD_WITH_VECTORS, FLA_LOWER_TRIANGULAR, B, l); /* SVD */ /* FLA_Obj_create(FLA_DOUBLE, size, size, 0, 0, &B); */ /* use U */ /* FLA_Svd(FLA_SVD_VECTORS_ALL, FLA_SVD_VECTORS_NONE, A, l, B, C); */ /* use V */ /* FLA_Svd(FLA_SVD_VECTORS_NONE, FLA_SVD_VECTORS_ALL, A, l, C, B); */ FLA_Obj_free_without_buffer(&l); for (i = 0; i < size; i++) { if (eigvals[i] < cutoff) { inv_eigvals[i] = 0; } else { inv_eigvals[i] = 1.0 / sqrt(eigvals[i]); } } FLA_Obj_create_without_buffer(FLA_DOUBLE, size, 1, &l); FLA_Obj_attach_buffer(inv_eigvals, 0, 0, &l); FLA_Apply_diag_matrix(FLA_RIGHT, FLA_NO_CONJUGATE, l, B); FLA_Syrk(FLA_LOWER_TRIANGULAR, FLA_NO_TRANSPOSE, FLA_ONE, B, FLA_ZERO, A); FLA_Symmetrize(FLA_LOWER_TRIANGULAR, A); FLA_Obj_free_without_buffer(&A); FLA_Obj_free_without_buffer(&l); FLA_Obj_free(&B); FLA_Finalize(); free(inv_eigvals); return 0; }
// ============================================================================ void compute_case1( int m, int n, int k, int l, FLA_Obj cb_A, FLA_Obj cb_B, FLA_Obj C, int print_data ) { FLA_Obj slice_A, slice_B; int datatype, h; double * buff_cb_A, * buff_cb_B; // Some initializations. datatype = FLA_Obj_datatype( cb_A ); buff_cb_A = ( double * ) FLA_Obj_buffer_at_view( cb_A ); buff_cb_B = ( double * ) FLA_Obj_buffer_at_view( cb_B ); // Prepare temporal slices. FLA_Obj_create_without_buffer( datatype, m, k, & slice_A ); FLA_Obj_create_without_buffer( datatype, n, k, & slice_B ); // Initialize matrix C for the result. MyFLA_Obj_set_to_zero( C ); // Show data. if( print_data == 1 ) { FLA_Obj_show( " Ci = [ ", C, "%le", " ];" ); FLA_Obj_show( " cb_A = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B = [ ", cb_B, "%le", " ];" ); } // Perform computation. for( h = 0; h < l; h++ ) { FLA_Obj_attach_buffer( buff_cb_A + m * k * h, 1, m, & slice_A ); FLA_Obj_attach_buffer( buff_cb_B + n * k * h, 1, n, & slice_B ); FLA_Gemm( FLA_NO_TRANSPOSE, FLA_TRANSPOSE, FLA_ONE, slice_A, slice_B, FLA_ONE, C ); } // Remove temporal slices. FLA_Obj_free_without_buffer( & slice_A ); FLA_Obj_free_without_buffer( & slice_B ); // Show data. if( print_data == 1 ) { FLA_Obj_show( " Cf = [ ", C, "%le", " ];" ); } }
void FLA_Gemm_pack_andor_scale_A( FLA_Trans transA, FLA_Obj alpha, FLA_Obj A, FLA_Obj* packed_A ) { int m, n, ldim_A; double *buff_packed_A, *buff_packed_aligned_A, *buff_A; m = FLA_Obj_length( A ); n = FLA_Obj_width ( A ); ldim_A = FLA_Obj_ldim ( A ); buff_A = ( double* ) FLA_Obj_buffer_at_view( A ); dgemm_itcopy( m, n, buff_A, ldim_A, FLA_Work_buffer_aligned_A ); FLA_Obj_create_without_buffer( FLA_DOUBLE, m, n, packed_A ); // FLA_Obj_attach_buffer( buff_packed_A, n, packed_A ); }
void FLA_Gemm_pack_andor_scale_B( FLA_Trans transB, FLA_Obj alpha, FLA_Obj B, FLA_Obj* packed_B ) { int m, n, ldim_B; double *buff_packed_B, *buff_packed_aligned_B, *buff_B; m = FLA_Obj_length( B ); n = FLA_Obj_width ( B ); ldim_B = FLA_Obj_ldim ( B ); buff_B = ( double* ) FLA_Obj_buffer_at_view( B ); dgemm_oncopy( m, n, buff_B, ldim_B, FLA_Work_buffer_aligned_B ); FLA_Obj_create_without_buffer( FLA_DOUBLE, m, n, packed_B ); // FLA_Obj_attach_buffer( buff_packed_B, m, packed_B ); }
void F77_fla_obj_show( char* prefix, int* m, int* n, void* buffer, int* ldim ) { FLA_Error init_result; FLA_Datatype datatype; FLA_Obj A; switch( *prefix ) { case 'i': case 'I': datatype = FLA_INT; break; case 's': case 'S': datatype = FLA_FLOAT; break; case 'd': case 'D': datatype = FLA_DOUBLE; break; case 'c': case 'C': datatype = FLA_COMPLEX; break; case 'z': case 'Z': datatype = FLA_DOUBLE_COMPLEX; break; default: fprintf(stderr, "Invalid prefix %c, where i,s,d,c,z are allowed.\n", *prefix); FLA_Abort(); } FLA_Init_safe( &init_result ); FLA_Obj_create_without_buffer( datatype, *m, *n, &A ); FLA_Obj_attach_buffer( buffer, 1, *ldim, &A ); FLA_Obj_fshow( stdout, "= F77_FLA_OBJ_SHOW =", A, "% 6.4e", "=-=-=-=-=-=-=-=-=-=-\n"); FLA_Obj_free_without_buffer( &A ); FLA_Finalize_safe( init_result ); }
FLA_Error FLA_Copy_object_to_buffer( FLA_Trans trans, dim_t i, dim_t j, FLA_Obj A, dim_t m, dim_t n, void* B_buffer, dim_t rs, dim_t cs ) { FLA_Obj B; FLA_Obj ATL, ATR, ABL, Aij; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_object_to_buffer_check( trans, i, j, A, m, n, B_buffer, rs, cs ); FLA_Part_2x2( A, &ATL, &ATR, &ABL, &Aij, i, j, FLA_TL ); FLA_Obj_create_without_buffer( FLA_Obj_datatype( A ), m, n, &B ); FLA_Obj_attach_buffer( B_buffer, rs, cs, &B ); FLA_Copyt_external( trans, Aij, B ); FLA_Obj_free_without_buffer( &B ); return FLA_SUCCESS; }
FLA_Error FLA_Copy_buffer_to_object( FLA_Trans trans, dim_t m, dim_t n, void* A_buffer, dim_t rs, dim_t cs, dim_t i, dim_t j, FLA_Obj B ) { FLA_Obj A; FLA_Obj BTL, BTR, BBL, Bij; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) FLA_Copy_buffer_to_object_check( trans, m, n, A_buffer, rs, cs, i, j, B ); FLA_Part_2x2( B, &BTL, &BTR, &BBL, &Bij, i, j, FLA_TL ); FLA_Obj_create_without_buffer( FLA_Obj_datatype( B ), m, n, &A ); FLA_Obj_attach_buffer( A_buffer, rs, cs, &A ); FLA_Copyt_external( trans, A, Bij ); FLA_Obj_free_without_buffer( &A ); return FLA_SUCCESS; }
FLA_Error FLASH_Axpy_hier_to_buffer( FLA_Obj alpha, dim_t i, dim_t j, FLA_Obj H, dim_t m, dim_t n, void* buffer, dim_t rs, dim_t cs ) { FLA_Obj flat_matrix; FLA_Datatype datatype; FLA_Error e_val; if ( FLA_Check_error_level() >= FLA_MIN_ERROR_CHECKING ) { e_val = FLA_Check_if_scalar( alpha ); FLA_Check_error_code( e_val ); e_val = FLA_Check_consistent_object_datatype( alpha, H ); FLA_Check_error_code( e_val ); e_val = FLA_Check_matrix_strides( m, n, rs, cs ); FLA_Check_error_code( e_val ); e_val = FLA_Check_submatrix_dims_and_offset( m, n, i, j, H ); FLA_Check_error_code( e_val ); } // Acquire the datatype from the hierarchical matrix object. datatype = FLASH_Obj_datatype( H ); // Create a temporary conventional matrix object of the requested datatype // and dimensions and attach the given buffer containing the incoming data. FLA_Obj_create_without_buffer( datatype, m, n, &flat_matrix ); FLA_Obj_attach_buffer( buffer, rs, cs, &flat_matrix ); // Recurse through H, adding in the corresponding elements of flat_matrix, // starting at the (i,j) element offset. FLASH_Axpy_hier_to_flat( alpha, i, j, H, flat_matrix ); // Free the object (but don't free the buffer!). FLA_Obj_free_without_buffer( &flat_matrix ); return FLA_SUCCESS; }
// ============================================================================ void compute_case4b( int size_a, int size_b, int size_c, int size_d, int size_i, int size_j, FLA_Obj cb_A, FLA_Obj cb_B, FLA_Obj cb_C, int print_data ) { FLA_Obj slice_C; int datatype, size_ab, size_abc, size_ia, size_iaj, size_jc, size_jci, iter_a, iter_b, iter_c, iter_d, iter_i, iter_j, ldim_slice_C; size_t idx_A, idx_B, idx_C; double * buff_cb_A, * buff_cb_B, * buff_cb_C, * buff_slice_C, d_one = 1.0; // Some initializations. datatype = FLA_Obj_datatype( cb_A ); buff_cb_A = ( double * ) FLA_Obj_buffer_at_view( cb_A ); buff_cb_B = ( double * ) FLA_Obj_buffer_at_view( cb_B ); buff_cb_C = ( double * ) FLA_Obj_buffer_at_view( cb_C ); size_ab = size_a * size_b; size_abc = size_a * size_b * size_c; size_ia = size_i * size_a; size_iaj = size_i * size_a * size_j; size_jc = size_j * size_c; size_jci = size_j * size_c * size_i; // Show data. if( print_data == 1 ) { FLA_Obj_show( " cb_A_i = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B_i = [ ", cb_B, "%le", " ];" ); FLA_Obj_show( " cb_C_i = [ ", cb_C, "%le", " ];" ); } // Prepare temporal slices without buffer. FLA_Obj_create_without_buffer( datatype, size_a, size_c, & slice_C ); #if 0 FLA_Obj_create_without_buffer( datatype, size_a, 1, & slice_A ); FLA_Obj_create_without_buffer( datatype, size_c, 1, & slice_B ); #endif // Perform computation. for( iter_b = 0; iter_b < size_b; iter_b++ ) { for( iter_d = 0; iter_d < size_d; iter_d++ ) { // Define slice_C. iter_a = 0; iter_c = 0; idx_C = ( ( size_t ) iter_a ) + ( ( size_t ) iter_b * size_a ) + ( ( size_t ) iter_c * size_ab ) + ( ( size_t ) iter_d * size_abc ); FLA_Obj_attach_buffer( & buff_cb_C[ idx_C ], 1, size_ab, & slice_C ); buff_slice_C = ( double * ) FLA_Obj_buffer_at_view( slice_C ); ldim_slice_C = FLA_Obj_col_stride( slice_C ); // Initialize slice_C. MyFLA_Obj_set_to_zero( slice_C ); for( iter_i = 0; iter_i < size_i; iter_i++ ) { for( iter_j = 0; iter_j < size_j; iter_j++ ) { #if 0 // Define slice_A. FLA_Obj_attach_buffer( & buff_cb_A[ iter_i + 0 * size_i + iter_j * size_ia + iter_b * size_iaj ], size_i, 1, & slice_A ); // Define slice_B. FLA_Obj_attach_buffer( & buff_cb_B[ iter_j + 0 * size_j + iter_i * size_jc + iter_d * size_jci ], size_j, 1, & slice_B ); // Compute DGER operation. FLA_Ger( FLA_ONE, slice_A, slice_B, slice_C ); #endif idx_A = ( ( size_t ) iter_i ) + ( ( size_t ) 0 * size_i ) + ( ( size_t ) iter_j * size_ia ) + ( ( size_t ) iter_b * size_iaj ); idx_B = ( ( size_t ) iter_j ) + ( ( size_t ) 0 * size_j ) + ( ( size_t ) iter_i * size_jc ) + ( ( size_t ) iter_d * size_jci ); dger_( & size_a, & size_c, & d_one, & buff_cb_A[ idx_A ], & size_i, & buff_cb_B[ idx_B ], & size_j, buff_slice_C, & ldim_slice_C ); } } } } // Show data. if( print_data == 1 ) { FLA_Obj_show( " cb_A_f = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B_f = [ ", cb_B, "%le", " ];" ); FLA_Obj_show( " cb_C_f = [ ", cb_C, "%le", " ];" ); } // Remove temporal slices. FLA_Obj_free_without_buffer( & slice_C ); #if 0 FLA_Obj_free_without_buffer( & slice_A ); FLA_Obj_free_without_buffer( & slice_B ); #endif }
// ============================================================================ void compute_case2b( int size_a, int size_b, int size_c, int size_d, int size_i, int size_j, FLA_Obj cb_A, FLA_Obj cb_B, FLA_Obj cb_C, int print_data ) { FLA_Obj slice_A, slice_B, slice_C; int datatype, size_ab, size_abc, size_ia, size_iaj, size_jc, size_jci, iter_a, iter_b, iter_c, iter_d, iter_i, iter_j, ii, jj, ldim_slice_B; size_t idx_A, idx_B, idx_C; double * buff_cb_A, * buff_cb_B, * buff_cb_C, * buff_slice_B; // Some initializations. datatype = FLA_Obj_datatype( cb_A ); buff_cb_A = ( double * ) FLA_Obj_buffer_at_view( cb_A ); buff_cb_B = ( double * ) FLA_Obj_buffer_at_view( cb_B ); buff_cb_C = ( double * ) FLA_Obj_buffer_at_view( cb_C ); size_ab = size_a * size_b; size_abc = size_a * size_b * size_c; size_ia = size_i * size_a; size_iaj = size_i * size_a * size_j; size_jc = size_j * size_c; size_jci = size_j * size_c * size_i; // Show data. if( print_data == 1 ) { FLA_Obj_show( " cb_A_i = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B_i = [ ", cb_B, "%le", " ];" ); FLA_Obj_show( " cb_C_i = [ ", cb_C, "%le", " ];" ); } // Prepare temporal slices without buffer. FLA_Obj_create_without_buffer( datatype, size_i, size_a, & slice_A ); FLA_Obj_create( datatype, size_i, size_d, 0, 0, & slice_B ); FLA_Obj_create_without_buffer( datatype, size_a, size_d, & slice_C ); // Perform computation. for( iter_b = 0; iter_b < size_b; iter_b++ ) { for( iter_c = 0; iter_c < size_c; iter_c++ ) { iter_a = 0; iter_d = 0; iter_i = 0; idx_C = ( ( size_t ) iter_a ) + ( ( size_t ) iter_b * size_a ) + ( ( size_t ) iter_c * size_ab ) + ( ( size_t ) iter_d * size_abc ), FLA_Obj_attach_buffer( & buff_cb_C[ idx_C ], 1, size_abc, & slice_C ); MyFLA_Obj_set_to_zero( slice_C ); for( iter_j = 0; iter_j < size_j; iter_j++ ) { // Define Ai. idx_A = ( ( size_t ) iter_i ) + ( ( size_t ) iter_a * size_i ) + ( ( size_t ) iter_j * size_ia ) + ( ( size_t ) iter_b * size_iaj ); FLA_Obj_attach_buffer( & buff_cb_A[ idx_A ], 1, size_i, & slice_A ); // Define Bi. buff_slice_B = ( double * ) FLA_Obj_buffer_at_view( slice_B ); ldim_slice_B = FLA_Obj_col_stride( slice_B ); for( jj = 0; jj < size_d; jj++ ) { for( ii = 0; ii < size_i; ii++ ) { idx_B = ( ( size_t ) iter_j ) + ( ( size_t ) iter_c * size_j ) + ( ( size_t ) ii * size_jc ) + ( ( size_t ) jj * size_jci ); buff_slice_B[ ii + jj * ldim_slice_B ] = buff_cb_B[ idx_B ]; } } // Compute Ai' * Bi. FLA_Gemm( FLA_TRANSPOSE, FLA_NO_TRANSPOSE, FLA_ONE, slice_A, slice_B, FLA_ONE, slice_C ); } } } // Show data. if( print_data == 1 ) { FLA_Obj_show( " cb_A_f = [ ", cb_A, "%le", " ];" ); FLA_Obj_show( " cb_B_f = [ ", cb_B, "%le", " ];" ); FLA_Obj_show( " cb_C_f = [ ", cb_C, "%le", " ];" ); } // Remove temporal slices. FLA_Obj_free_without_buffer( & slice_A ); FLA_Obj_free( & slice_B ); FLA_Obj_free_without_buffer( & slice_C ); }