int create_diag( PLA_Obj diag ) { int i, size; double d_i; PLA_Obj diag_1 = NULL, diag_B = NULL; PLA_Obj_view_all( diag, &diag_B ); i = 1; while ( TRUE ){ PLA_Obj_global_length( diag_B, &size ); if ( size == 0 ) break; PLA_Obj_horz_split_2( diag_B, 1, &diag_1, &diag_B ); d_i = ( double ) i; PLA_Obj_set( diag_1, MPI_DOUBLE, &d_i ); i++; } PLA_Obj_free( &diag_1 ); PLA_Obj_free( &diag_B ); return PLA_SUCCESS; }
int PLA_Symmetrize( int uplo, PLA_Obj A ) { int size; PLA_Obj A_BR = NULL, A_12 = NULL, A_21 = NULL; if ( uplo != PLA_LOWER_TRIANGULAR ) PLA_Abort( "only uplo == PLA_LOWER_TRIANGULAR implemented", __LINE__, __FILE__ ); PLA_Obj_view_all( A, &A_BR ); while ( TRUE ){ PLA_Obj_global_length( A_BR, &size ); if ( size == 0 ) break; PLA_Obj_split_4( A_BR, 1, 1, PLA_DUMMY, &A_12, &A_21, &A_BR ); PLA_Obj_set_orientation( A_12, PLA_PROJ_ONTO_ROW ); PLA_Copy( A_21, A_12 ); } PLA_Obj_free( &A_BR ); PLA_Obj_free( &A_12 ); PLA_Obj_free( &A_21 ); return PLA_SUCCESS; }
int PLA_Obj_set_to_identity( PLA_Obj A ) { int length, width; PLA_Obj A11 = NULL, ABR = NULL; PLA_Obj_global_length( A, &length ); PLA_Obj_global_width( A, &width ); if ( length != width ){ printf("PLA_Obj_set_to_identity() error: object not square\n"); exit ( 0 ); } PLA_Obj_set_to_zero( A ); PLA_Obj_view_all( A, &ABR ); while ( TRUE ){ PLA_Obj_global_length( ABR, &length ); if ( 0 == length ) break; PLA_Obj_split_4( ABR, 1, 1, &A11, PLA_DUMMY, PLA_DUMMY, &ABR ); PLA_Obj_set_to_one( A11 ); } PLA_Obj_free( &A11 ); PLA_Obj_free( &ABR ); return PLA_SUCCESS; }
int PLA_Matrix_infinity_norm_exit( PLA_Obj A, PLA_Obj alpha ) { int value = PLA_SUCCESS, size_malloced; double PLA_Local_abs_max(), PLA_Local_abs_diff(), diff; char routine_name[ 35 ]; PLA_Routine_stack_push( "PLA_Matrix_inf_norm_exit" ); if ( PLA_CHECK_AGAINST_SEQUENTIAL ){ PLA_Obj A_tmp = NULL, alpha_tmp = NULL, alpha_cpy = NULL; PLA_Mscalar_create_conf_to( A, PLA_ALL_ROWS, PLA_ALL_COLS, &A_tmp ); PLA_Copy( A, A_tmp ); PLA_Mscalar_create_conf_to( alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_tmp ); PLA_Copy( alpha, alpha_tmp ); PLA_Mscalar_create_conf_to( alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_cpy ); PLA_Local_matrix_infinity_norm( A_tmp, alpha_cpy ); diff = PLA_Local_abs_diff( alpha_tmp, alpha_cpy ); if ( diff > 0.000001 ){ PLA_Warning( "PLA_Matrix_infinity_norm: large absolute error encountered" ); value--; } PLA_Obj_free( &alpha_cpy ); PLA_Obj_free( &alpha_tmp ); PLA_Obj_free( &A_tmp ); } size_malloced = PLA_Total_size_malloced( ); if ( size_malloced != old_size_malloced ) PLA_Warning( "PLA_Matrix_infinity_norm: memory discrepency" ); PLA_Routine_stack_pop( routine_name ); PLA_Routine_stack_pop( routine_name ); return value; }
PLA_Copy_sym_tridiag_to_msc( int uplo, PLA_Obj A, PLA_Obj tridiag ) { int size; PLA_Obj A_BR = NULL, A_11 = NULL, A_21 = NULL, A_21_1 = NULL, temp = NULL, diag_1 = NULL, diag_B = NULL, subdiag_1 = NULL, subdiag_B = NULL; if ( uplo != PLA_LOWER_TRIANGULAR ) PLA_Abort( "uplo != PLA_LOWER_TRIANGULAR not yet implemented", __LINE__, __FILE__ ); PLA_Mscalar_create_conf_to( tridiag, PLA_ALL_ROWS, PLA_ALL_COLS, &temp ); PLA_Obj_set_to_zero( temp ); PLA_Obj_view_all( A, &A_BR ); PLA_Obj_vert_split_2( temp, 1, &diag_B, &subdiag_B ); while ( TRUE ) { PLA_Obj_global_length( A_BR, &size ); if ( size == 0 ) break; PLA_Obj_split_4( A_BR, 1, 1, &A_11, PLA_DUMMY, &A_21, &A_BR ); PLA_Obj_horz_split_2( diag_B, 1, &diag_1, &diag_B ); PLA_Obj_horz_split_2( subdiag_B, 1, &subdiag_1, &subdiag_B ); PLA_Local_copy( A_11, diag_1 ); if ( size > 1 ) { PLA_Obj_horz_split_2( A_21, 1, &A_21_1, PLA_DUMMY ); PLA_Local_copy( A_21_1, subdiag_1 ); } } PLA_Reduce( temp, MPI_SUM, tridiag ); PLA_Obj_free( &A_BR ); PLA_Obj_free( &A_11 ); PLA_Obj_free( &A_21 ); PLA_Obj_free( &A_21_1 ); PLA_Obj_free( &temp ); PLA_Obj_free( &diag_1 ); PLA_Obj_free( &diag_B ); PLA_Obj_free( &subdiag_1 ); PLA_Obj_free( &subdiag_B ); return PLA_SUCCESS; }
int PLA_Asum( PLA_Obj x, PLA_Obj alpha ) { int value = PLA_SUCCESS; PLA_Obj alpha_local = NULL; if ( PLA_ERROR_CHECKING ) value = PLA_Asum_enter( x, alpha ); PLA_Mscalar_create_conf_to ( alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_local ); PLA_Local_asum( x, alpha_local ); PLA_Reduce( alpha_local, MPI_SUM, alpha ); PLA_Obj_free( &alpha_local ); if ( PLA_ERROR_CHECKING ) value = PLA_Asum_exit( x, alpha ); return value; }
int PLA_Obj_set_diagonal( PLA_Obj A, PLA_Obj x ) /* PLA_Obj_set_diag Purpose: Create diagonal matrix A with diagonal values equal to the values in vector x. Input: x -- PLA_MVECTOR of width 1 specifies the values to be placed on the diagonal of A OUTPUT: A -- PLA_MATRIX */ { int size; PLA_Obj A_11 = NULL, A_BR = NULL, x_1 = NULL, x_B = NULL; PLA_Obj_view_all( A, &A_BR ); PLA_Obj_view_all( x, &x_B ); while ( TRUE ){ PLA_Obj_global_length( x_B, &size ); if ( size == 0 ) break; PLA_Obj_split_4( A_BR, 1, 1, &A_11, PLA_DUMMY, PLA_DUMMY, &A_BR ); PLA_Obj_horz_split_2( x_B, 1, &x_1, &x_B ); PLA_Local_copy( x_1, A_11 ); } PLA_Obj_free( &A_11 ); PLA_Obj_free( &A_BR ); PLA_Obj_free( &x_1 ); PLA_Obj_free( &x_B ); return PLA_SUCCESS; }
int PLA_Ger( PLA_Obj alpha, PLA_Obj x, PLA_Obj y, PLA_Obj A ) { int value = PLA_SUCCESS, owner_row, owner_col; PLA_Obj alpha_cpy = NULL, x_dup = NULL, y_dup = NULL; if ( PLA_ERROR_CHECKING ) /* Perform parameter and error checking */ value = PLA_Ger_enter( alpha, x, y, A ); /* If necessary, duplicate alpha and beta to all nodes */ PLA_Obj_owner_row( alpha, &owner_row ); PLA_Obj_owner_col( alpha, &owner_col ); if ( owner_row != PLA_ALL_ROWS || owner_col != PLA_ALL_COLS ){ PLA_Mscalar_create_conf_to( alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha_cpy ); PLA_Copy( alpha, alpha_cpy ); } PLA_Pmvector_create_conf_to( A, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, 1, &y_dup ); PLA_Pmvector_create_conf_to( A, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, 1, &x_dup ); PLA_Copy( x, x_dup ); PLA_Copy( y, y_dup ); PLA_Local_ger( ( alpha_cpy == NULL ? alpha: alpha_cpy ), x_dup, y_dup, A ); PLA_Obj_free( &alpha_cpy ); PLA_Obj_free( &x_dup ); PLA_Obj_free( &y_dup ); if ( PLA_ERROR_CHECKING ) /* Perform exit error checking */ value = PLA_Ger_exit( alpha, x, y, A ); return value; }
int PLA_Herk_perform_local_part( int uplo, PLA_Obj alpha, PLA_Obj Xdpmv, PLA_Obj Xdpmv_conj_trans, PLA_Obj beta, PLA_Obj A ) { int length, local_size, length_1, count, me; PLA_Obj A11 = NULL, A21 = NULL, A22 = NULL, Xdpmv_1 = NULL, Xdpmv_2 = NULL, Xdpmv_conj_trans_1 = NULL, Xdpmv_conj_trans_2 = NULL; PLA_Obj_local_width( A, &local_size ); if ( 0 == local_size ) return; PLA_Obj_local_length( A, &local_size ); if ( 0 == local_size ) return; PLA_Obj_global_length( A, &length ); if ( length >= 256 ) { length_1 = (length/2); length_1 = length_1 - length_1 % 64 + 64; if ( length_1 >= length ) length = length/2; PLA_Obj_split_4 ( A, length_1, length_1, &A11, PLA_DUMMY, &A21, &A22 ); PLA_Obj_horz_split_2( Xdpmv, length_1, &Xdpmv_1, &Xdpmv_2 ); PLA_Obj_vert_split_2( Xdpmv_conj_trans, length_1, &Xdpmv_conj_trans_1, &Xdpmv_conj_trans_2 ); PLA_Local_gemm( PLA_NO_TRANS, PLA_NO_TRANS, alpha, Xdpmv_2, Xdpmv_conj_trans_1, beta, A21 ); PLA_Herk_perform_local_part( PLA_LOWER_TRIANGULAR, alpha, Xdpmv_1, Xdpmv_conj_trans_1, beta, A11 ); PLA_Herk_perform_local_part( PLA_LOWER_TRIANGULAR, alpha, Xdpmv_2, Xdpmv_conj_trans_2, beta, A22 ); } else { PLA_Herk_perform_local_part_by_panels( uplo, alpha, Xdpmv, Xdpmv_conj_trans, beta, A ); } PLA_Obj_free( &A11 ); PLA_Obj_free( &A21 ); PLA_Obj_free( &A22 ); PLA_Obj_free( &Xdpmv_1 ); PLA_Obj_free( &Xdpmv_2 ); PLA_Obj_free( &Xdpmv_conj_trans_1 ); PLA_Obj_free( &Xdpmv_conj_trans_2 ); return( PLA_SUCCESS ); }
int PLA_Local_add(PLA_Obj x, PLA_Obj y) { PLA_Obj one = NULL; PLA_Create_constants_conf_to( x, NULL, NULL, &one ); PLA_Local_axpy( one, x, y ); PLA_Obj_free( &one ); return PLA_SUCCESS; }
int PLA_Local_sym_tridiag_eig( PLA_Obj tridiag, PLA_Obj Z ) { int info, length, i_one = 1, local_length_Z, ldz; void *buffer_diag, *buffer_subdiag, *buffer_Z, *work; MPI_Datatype datatype; PLA_Obj diag = NULL, subdiag = NULL; PLA_Obj_vert_split_2( tridiag, 1, &diag, &subdiag ); PLA_Obj_datatype( diag, &datatype ); PLA_Obj_global_length( diag, &length ); PLA_Obj_local_buffer( diag, &buffer_diag ); PLA_Obj_local_buffer( subdiag, &buffer_subdiag ); if ( Z == NULL ) PLA_dsteqr( "N", &length, (double *) buffer_diag, (double *) buffer_subdiag, NULL, &i_one, work, &info ); else{ PLA_Obj_local_length( Z, &local_length_Z ); PLA_Obj_local_buffer( Z, &buffer_Z ); PLA_Obj_local_ldim( Z, &ldz ); work = ( double * ) PLA_malloc( 2 * length * sizeof( double ) ); PLA_dsteqr_x( "V", &local_length_Z, &length, (double *) buffer_diag, (double *) buffer_subdiag, (double *) buffer_Z, &ldz, work, &info ); PLA_free( work ); } PLA_Obj_free( &diag ); PLA_Obj_free( &subdiag ); return PLA_SUCCESS; }
int PLA_Obj_set_to_zero_below_first_subdiagonal( PLA_Obj A ) { PLA_Obj A21 = NULL; PLA_Obj_split_4( A, 1, -1, PLA_DUMMY, PLA_DUMMY, &A21, PLA_DUMMY ); PLA_Obj_set_to_zero_below_diagonal( A21 ); PLA_Obj_free( &A21 ); return PLA_SUCCESS; }
int PLA_Obj_set_to_zero_below_diagonal( PLA_Obj A ) { PLA_Obj A_BR = NULL, a_21 = NULL; int size; PLA_Obj_view_all( A, &A_BR ); while( TRUE ){ PLA_Obj_global_length( A_BR, &size ); if ( size == 0 ) break; PLA_Obj_split_4( A_BR, 1, 1, PLA_DUMMY, PLA_DUMMY, &a_21, &A_BR ); PLA_Obj_set_to_zero( a_21 ); } PLA_Obj_free( &A_BR ); PLA_Obj_free( &a_21 ); return PLA_SUCCESS; }
static int pmatdestroy(void*MM){ plapackM* ctx=(plapackM*)MM; int info; DSDPFunctionBegin; PPDSDPPrint("PDSDP: Compute M: %5.5e \n",ctx->thessian); PPDSDPPrint("PDSDP: Solve M %4.5e seconds\n ",ctx->tsolve); info=PLA_Obj_free(&ctx->AMat); DSDPCHKERR(info); info=PLA_Obj_free(&ctx->vVec); DSDPCHKERR(info); info=PLA_Obj_free(&ctx->wVec); DSDPCHKERR(info); info=PLA_Obj_free(&ctx->one); DSDPCHKERR(info); info=PLA_Obj_free(&ctx->zero); DSDPCHKERR(info); info=PLA_Obj_free(&ctx->dxerror); DSDPCHKERR(info); info=PLA_Temp_free(&ctx->templ); DSDPCHKERR(info); info=PLA_Finalize(); DSDPCHKERR(info); DSDPFREE(&ctx,&info);DSDPCHKERR(info); DSDPFunctionReturn(0); }
void PLA_Trmm_right_upper_trans( int diag, PLA_Obj alpha, PLA_Obj A, PLA_Obj B, int nb_alg ) { PLA_Obj ATL=NULL, ATR=NULL, A00=NULL, A01=NULL, A02=NULL, BL=NULL, B0=NULL, ABL=NULL, ABR=NULL, A10=NULL, A11=NULL, A12=NULL, BR=NULL, B1=NULL, A20=NULL, A21=NULL, A22=NULL, B2=NULL, A11_dup=NULL, B1_mv=NULL, A01_dpmv=NULL, B1_dpmv=NULL, ONE=NULL; int b, size_row; PLA_Create_constants_conf_to( A, NULL, NULL, &ONE ); PLA_Part_2x2( A, &ATL, /**/ &ATR, /* ************** */ &ABL, /**/ &ABR, 0, 0, /* submatrix */ PLA_TL ); PLA_Part_1x2( B, &BL, /**/ &BR, 0, /* width submatrix */ PLA_LEFT ); while ( TRUE ){ PLA_Obj_global_length( ABR, &size_row ); b = min( size_row, nb_alg ); if ( 0 == b ) break; PLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, /**/ &A01, &A02, /* ************* */ /* ********************* */ /**/ &A10, /**/ &A11, &A12, ABL, /**/ ABR, &A20, /**/ &A21, &A22, b, b, /* A11 from */ PLA_BR ); PLA_Repart_1x2_to_1x3( BL, /**/ BR, &B0, /**/ &B1, &B2, b, /* width B1 from */ PLA_RIGHT ); /* ********************************************************************* */ PLA_Pmvector_create_conf_to( B0, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, b, &B1_dpmv ); PLA_Pmvector_create_conf_to( B0, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, b, &A01_dpmv ); PLA_Copy( B1, B1_dpmv ); PLA_Copy( A01, A01_dpmv ); PLA_Local_gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, alpha, B1_dpmv, A01_dpmv, ONE, B0 ); PLA_Mscalar_create_conf_to( A11, PLA_ALL_ROWS, PLA_ALL_COLS, &A11_dup ); PLA_Copy( A11, A11_dup ); PLA_Local_trmm( PLA_RIGHT, PLA_UPPER_TRIANGULAR, PLA_TRANSPOSE, diag, alpha, A11_dup, B1_dpmv ); PLA_Copy( B1_dpmv, B1 ); /* ********************************************************************* */ PLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, A01, /**/ A02, /**/ A10, A11, /**/ A12, /* ************** */ /* ****************** */ &ABL, /**/ &ABR, A20, A21, /**/ A22, /* A11 added to */ PLA_TL ); PLA_Cont_with_1x3_to_1x2( &BL, /**/ &BR, B0, B1, /**/ B2, /* B1 added to */ PLA_LEFT ); } PLA_Obj_free( &ATL ); PLA_Obj_free( &ATR ); PLA_Obj_free( &A00 ); PLA_Obj_free( &A01 ); PLA_Obj_free( &A01_dpmv ); PLA_Obj_free( &A02 ); PLA_Obj_free( &BL ); PLA_Obj_free( &B0 ); PLA_Obj_free( &ABL ); PLA_Obj_free( &ABR ); PLA_Obj_free( &A10 ); PLA_Obj_free( &A11 ); PLA_Obj_free( &A11_dup ); PLA_Obj_free( &A12 ); PLA_Obj_free( &BR ); PLA_Obj_free( &B1 ); PLA_Obj_free( &B1_mv ); PLA_Obj_free( &B1_dpmv ); PLA_Obj_free( &A20 ); PLA_Obj_free( &A21 ); PLA_Obj_free( &A22 ); PLA_Obj_free( &B2 ); PLA_Obj_free( &ONE ); }
int PLA_Tri_red_exit( int uplo, PLA_Obj A, PLA_Obj s, PLA_Obj Q ) { int value = PLA_SUCCESS, size_malloced; double max_A, diff, PLA_Local_abs_max(), PLA_Local_abs_diff(); char routine_name[ 35 ]; PLA_Routine_stack_push( "PLA_Tri_red_exit" ); if ( PLA_CHECK_AGAINST_SEQUENTIAL ){ PLA_Obj A_temp = NULL, Q_temp = NULL, B = NULL, C = NULL, minus_one = NULL, zero = NULL, one = NULL; MPI_Datatype datatype; double tol; PLA_Obj_datatype( A, &datatype ); if ( datatype == MPI_DOUBLE || datatype == MPI_DOUBLE_COMPLEX ) tol = 0.000001; else tol = 0.0001; PLA_Create_constants_conf_to( A, &minus_one, &zero, &one ); PLA_Obj_set_to_zero_below_first_subdiagonal( A ); PLA_Mscalar_create_conf_to( A, PLA_ALL_ROWS, PLA_ALL_COLS, &A_temp ); PLA_Mscalar_create_conf_to( Q, PLA_ALL_ROWS, PLA_ALL_COLS, &Q_temp ); PLA_Copy( Q, Q_temp ); PLA_Copy( A, A_temp ); max_A = PLA_Local_abs_max( A_cpy ); /* Form Q A Q^T */ PLA_Mscalar_create_conf_to( A_temp, PLA_ALL_ROWS, PLA_ALL_COLS, &B ); PLA_Mscalar_create_conf_to( A_temp, PLA_ALL_ROWS, PLA_ALL_COLS, &C ); /* B = Q A */ PLA_Local_symm( PLA_SIDE_RIGHT, PLA_LOWER_TRIANGULAR, one, A_temp, Q_temp, zero, B ); /* C = A - B Q^T*/ PLA_Local_copy( A_cpy, C ); PLA_Local_gemm( PLA_NO_TRANS, PLA_TRANS, minus_one, B, Q_temp, one, C ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, C ); diff = PLA_Local_abs_max( C ); if ( diff > tol * max_A ){ PLA_Warning( "PLA_Tri_red: large relative error encountered" ); value--; } PLA_Obj_free( &A_cpy ); PLA_Obj_free( &A_temp ); PLA_Obj_free( &Q_temp ); PLA_Obj_free( &B ); PLA_Obj_free( &C ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); } size_malloced = PLA_Total_size_malloced( ); if ( size_malloced != old_size_malloced ) PLA_Warning( "PLA_Tri_red: memory discrepency" ); PLA_Routine_stack_pop( routine_name ); PLA_Routine_stack_pop( routine_name ); return value; }
int PLA_API_Poll_request ( ) /*-------------------------------------------------------------------------- Purpose : Poll to see if messages have come in ----------------------------------------------------------------------------*/ { int msgtype, m, n, lda, obj_index, source, align_row, align_col, typesize, local_ldim, temp; PLA_Obj obj_source = NULL; void *local_buf; MPI_Datatype datatype; if ( PLA_API_Accept_recv_request_buffer( ) ){ /* New recv put_buffer has come in */ while ( !PLA_API_End_of_recv_request_buffer ( ) ){ /* Process next message */ PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &msgtype ); switch ( msgtype ){ case PLA_API_TYPE_PUT: PLA_Abort( "API illegal type", __LINE__, __FILE__ ); case PLA_API_TYPE_SYNC: request_sync_counter ++; break; case PLA_API_TYPE_GET: PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &m ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &n ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &obj_index ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &align_row ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &align_col ); PLA_API_read_data_from_request_buffer( sizeof( void * ), ( char * ) &local_buf ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &lda ); PLA_API_read_data_from_request_buffer( sizeof( int ), ( char * ) &source ); /* Take view into appropriate object */ PLA_Obj_view( open_objects_list[ obj_index ], m, n, align_row, align_col, &obj_source ); /* Create room to send current message */ PLA_Obj_datatype( obj_source, &datatype ); MPI_Type_size( datatype, &typesize ); PLA_API_put_buffer_make_room( 5 * sizeof( int ) + sizeof( void * ) + m * n * typesize, source ); /* Enter put destination information in the buffer */ temp = PLA_API_TYPE_GET; PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &temp, source ); /* PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &datatype, source ); */ PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &obj_index, source ); PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &m, source ); PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &n, source ); PLA_API_add_data_to_put_buffer( sizeof( void * ), ( char * ) &local_buf, source ); PLA_API_add_data_to_put_buffer( sizeof( int ), ( char * ) &lda, source ); /* Add the contents of the object to the buffer */ PLA_API_add_contents_from_obj_to_put_buffer( obj_source, source ); break; default: PLA_Abort( "API illegal type", __LINE__, __FILE__ ); } } } PLA_Obj_free( &obj_source ); return PLA_SUCCESS; }
int PLA_Chol_simple( int nb_alg, PLA_Obj A ) /* Purpose : Parallel Cholesky Factorization. This particular version assumes only the lower triangular portion of A is stored. It only uses PLAPACK parallel BLAS calls, to illustrate how a simple implementation reflects the algorithm perfectly. IN nb_alg integer, algorithmic block size to be used IN/OUT A matrix to be factored Algorith used: ****************************************************************** Partition A = / A_TL || * \ | ===== ===== | \ A_BL || A_BR / where A_TL is 0 x 0 while A_BR is not 0 x 0 Determine block size b Partition A = / A_TL || * \ / A_00 || * | * \ | ===== ===== | = | ==== ==== ==== | \ A_BL || A_BR / | A_10 || A_11 | * | | ---- ---- ---- | \ A_20 || A_21 | A_22 / where A_00 = A_TL and A_11 is b x b Update A_11 <- L_11 = Chol. Fact.( A_11 ) Update A_21 <- L_21 = A_21 inv( L_11' ) Update A_22 <- A_22 - L_21 * L_21' Continue with A = / A_TL || * \ / A_00 | * || * \ | ===== ===== | = | ---- ---- ---- | \ A_BL || A_BR / | A_10 | A_11 || * | | ==== ==== ==== | \ A_20 | A_21 || A_22 / endwhile ****************************************************************** NOTE: For details on how to implement parallel Cholesky factorization see R. van de Geijn, Using PLAPACK, The MIT Press, 1997. G. Morrow and R. van de Geijn, "Zen and the Art of High-Performance Parallel Computing," http://www.cs.utexas.edu/users/plapack */ { int value = 0, size_top, size_left, owner_top, owner_left, size; PLA_Obj ABR = NULL, A11 = NULL, A21 = NULL, one = NULL, minus_one = NULL; /* Create usual duplicated scalar constants */ PLA_Create_constants_conf_to( A, &minus_one, NULL, &one ); /* View ABR = A */ PLA_Obj_view_all( A, &ABR ); while ( TRUE ) { /* Determine size of current panel. Notice that we limit the size so that A_11 resides on a single processor */ PLA_Obj_split_size( ABR, PLA_SIDE_TOP, &size_top, &owner_top ); PLA_Obj_split_size( ABR, PLA_SIDE_LEFT, &size_left, &owner_left ); if ( 0 == ( size = min( min( size_top, size_left), nb_alg ) ) ) break; /* Partition A_BR = / A_11 * \ \ A_21 A_22 / where A_11 is b x b. Notice that A_22 becomes A_BR in the next iteration, so we already view this as ABR. */ PLA_Obj_split_4( ABR, size, size, &A11, PLA_DUMMY, &A21, &ABR ); /* Update A_11 <- L_11 = Chol. Fact.( A_11 ) */ PLA_Local_chol( PLA_LOWER_TRIANGULAR, A11 ); /* Update A_21 <- L_21 = A_21 inv( L_11' ) */ PLA_Trsm( PLA_SIDE_RIGHT, PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, one, A11, A21 ); /* Update A_22 <- A_22 - L_21 * L_21' */ PLA_Syrk( PLA_LOWER_TRIANGULAR, minus_one, A21, one, ABR ); } /* free temporary objects and views */ PLA_Obj_free( &ABR ); PLA_Obj_free( &A11 ); PLA_Obj_free( &A21 ); PLA_Obj_free( &one ); PLA_Obj_free( &minus_one ); return value; }
int main(int argc, char *argv[]) { MPI_Comm comm = MPI_COMM_NULL; MPI_Datatype datatype; PLA_Template templ = NULL; PLA_Obj A = NULL, A_orig = NULL, residual = NULL, minus_one = NULL, one = NULL, diff = NULL; int n, nb_distr, nb_alg, error, parameters, sequential, me, nprocs, nprows, npcols, itype; double time, flops; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (me==0) { printf("enter mesh size:\n"); scanf("%d%d", &nprows, &npcols ); printf("mesh size = %d x %d \n", nprows, npcols ); printf("enter distr. block size:\n"); scanf("%d", &nb_distr ); printf("nb_distr = %d\n", nb_distr ); printf("enter alg. block size:\n"); scanf("%d", &nb_alg ); printf("nb_alg = %d\n", nb_alg ); printf("turn on error checking? (0 = NO, 1 = YES):\n"); scanf("%d", &error ); printf("error checking = %d\n", error ); printf("turn on parameter checking? (0 = NO, 1 = YES):\n"); scanf("%d", ¶meters ); printf("parameter checking = %d\n", parameters ); printf("turn on sequential checking? (0 = NO, 1 = YES):\n"); scanf("%d", &sequential ); printf("sequential checking = %d\n", sequential ); } MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(¶meters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&sequential, 1, MPI_INT, 0, MPI_COMM_WORLD); pla_Environ_set_nb_alg( PLA_OP_ALL_ALG, nb_alg ); PLA_Set_error_checking( error, parameters, sequential, FALSE ); /* PLA_Comm_1D_to_2D_ratio(MPI_COMM_WORLD, 1.0, &comm); */ PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); PLA_Init(comm); PLA_Temp_create( nb_distr, 0, &templ ); while ( TRUE ) { if (me==0) { printf("enter datatype:\n"); printf("-1 = quit\n"); printf(" 0 = float\n"); printf(" 1 = double\n"); printf(" 2 = complex\n"); printf(" 3 = double complex\n"); scanf("%d", &itype ); printf("itype = %d\n", itype ); } MPI_Bcast(&itype, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( itype == -1 ) break; switch( itype ) { case 0: datatype = MPI_FLOAT; break; case 1: datatype = MPI_DOUBLE; break; case 2: datatype = MPI_COMPLEX; break; case 3: datatype = MPI_DOUBLE_COMPLEX; break; default: PLA_Abort( "unknown datatype", __LINE__, __FILE__ ); } if (me==0) { printf("enter n:\n"); scanf("%d", &n ); printf("n = %d\n", n ); } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); PLA_Matrix_create( datatype, n, n, templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A ); PLA_Matrix_create_conf_to( A, &A_orig ); PLA_Matrix_create_conf_to( A, &residual ); create_problem( A ); { double d_n; d_n = (double) n; PLA_Shift( A, MPI_DOUBLE, &d_n ); } PLA_Create_constants_conf_to( A, &minus_one, NULL, &one ); PLA_Local_copy( A, A_orig ); /* Use invert routine that uses factors */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime (); PLA_Triangular_invert( PLA_LOWER_TRIANGULAR, A ); MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime () - time; flops = 2.0/3.0 * n * n * n; if ( me == 0 ) printf("%d time = %f, MFLOPS/node = %10.4lf \n", n, time, flops / time * 1.0e-6 / nprocs ); PLA_Obj_set_to_identity( residual ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, A_orig ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, A ); PLA_Gemm( PLA_NO_TRANS, PLA_NO_TRANS, minus_one, A_orig, A, one, residual ); PLA_Mscalar_create( datatype, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &diff ); PLA_Matrix_one_norm( residual, diff ); } PLA_Obj_free( &A ); PLA_Obj_free( &A_orig ); PLA_Obj_free( &residual ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &one ); PLA_Obj_free( &diff ); PLA_Temp_free(&templ); PLA_Finalize( ); MPI_Finalize( ); }
int PLA_Symm_exit( int side, int uplo, PLA_Obj alpha, PLA_Obj A, PLA_Obj B, PLA_Obj beta, PLA_Obj C ) { int value = PLA_SUCCESS, size_malloced; double max_A, max_B, max_C, max_all, diff, PLA_Local_abs_max(), PLA_Local_abs_diff(); PLA_Obj one = NULL; char routine_name[ 35 ]; PLA_Routine_stack_push( "PLA_Gemm_exit" ); if ( PLA_CHECK_AGAINST_SEQUENTIAL ){ PLA_Obj C_temp = NULL; PLA_Mscalar_create_conf_to( C, PLA_ALL_ROWS, PLA_ALL_COLS, &C_temp ); PLA_Copy( C, C_temp ); max_A = PLA_Local_abs_max( A_cpy ); max_B = PLA_Local_abs_max( B_cpy ); PLA_Local_scal( beta_cpy, C_cpy ); max_C = PLA_Local_abs_max( C_cpy ); max_all = max( max_A, max( max_B, max_C ) ); PLA_Create_constants_conf_to( A, NULL, NULL, &one ); PLA_Local_symm( side, uplo, alpha_cpy, A_cpy, B_cpy, one, C_cpy ); diff = PLA_Local_abs_diff( C_temp, C_cpy ); if ( diff > 0.000001 * max_all ){ PLA_Warning( "PLA_Symm: large relative error encountered" ); value--; } PLA_Obj_free( &alpha_cpy ); PLA_Obj_free( &A_cpy ); PLA_Obj_free( &B_cpy ); PLA_Obj_free( &beta_cpy ); PLA_Obj_free( &C_cpy ); PLA_Obj_free( &C_temp ); PLA_Obj_free( &one ); } size_malloced = PLA_Total_size_malloced( ); if ( size_malloced != old_size_malloced ) PLA_Warning( "PLA_Symm: memory discrepency" ); PLA_Routine_stack_pop( routine_name ); PLA_Routine_stack_pop( routine_name ); return value; }
int PLA_Matrix_one_norm( PLA_Obj A, PLA_Obj alpha ) { int size, proj_onto_A, value = PLA_SUCCESS; PLA_Obj A_R = NULL, a_1 = NULL, col_asums = NULL, local_col_asums = NULL, local_col_asums_R = NULL, alpha_1 = NULL, local_max = NULL; if ( PLA_ERROR_CHECKING ) /* Perform parameter and error checking */ value = PLA_Matrix_one_norm_enter( A, alpha ); /* Create a vector to hold the one norm of the columns of the matrix */ PLA_Obj_get_orientation( A, &proj_onto_A ); PLA_Obj_set_orientation( A, PLA_PROJ_ONTO_ROW ); PLA_Mvector_create_conf_to( A, 1, &col_asums ); PLA_Obj_set_orientation( A, proj_onto_A ); /* Create a vector projected onto rows to hold the one norm of the columns of the local matrices */ PLA_Pmvector_create_conf_to( A, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, 1, &local_col_asums ); PLA_Obj_view_all( A, &A_R ); PLA_Obj_view_all( local_col_asums, &local_col_asums_R ); while ( TRUE ){ PLA_Obj_global_width( A_R, &size ); if ( size == 0 ) break; PLA_Obj_vert_split_2( A_R, 1, &a_1, &A_R ); PLA_Obj_vert_split_2( local_col_asums_R, 1, &alpha_1, &local_col_asums_R ); PLA_Local_asum( a_1, alpha_1 ); } /* compute global 1-norms of the columns */ PLA_Reduce( local_col_asums, MPI_SUM, col_asums ); /* Create a multiscalar in which to hold the local asum */ PLA_Mscalar_create_conf_to( alpha, PLA_ALL_ROWS, PLA_ALL_COLS, &local_max ); PLA_Local_absolute_max( col_asums, local_max ); PLA_Reduce( local_max, MPI_MAX, alpha ); PLA_Obj_free ( &A_R ); PLA_Obj_free ( &a_1 ); PLA_Obj_free ( &col_asums ); PLA_Obj_free ( &local_col_asums ); PLA_Obj_free ( &local_col_asums_R ); PLA_Obj_free ( &local_max ); PLA_Obj_free ( &alpha_1 ); if ( PLA_ERROR_CHECKING ) /* Perform parameter and error checking */ value = PLA_Matrix_one_norm_exit( A, alpha ); }
int PLA_API_Poll_put ( ) /*-------------------------------------------------------------------------- Purpose : Poll to see if messages have come in ----------------------------------------------------------------------------*/ { int msgtype, m, n, lda, obj_index, align_row, align_col, local_ldim; PLA_Obj obj_dest = NULL; void *local_buf; MPI_Datatype datatype; if ( PLA_API_Accept_recv_put_buffer( ) ){ /* New recv put_buffer has come in */ while ( !PLA_API_End_of_recv_put_buffer ( ) ){ /* Process next message */ PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &msgtype ); switch ( msgtype ){ case PLA_API_TYPE_PUT: PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &m ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &n ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &obj_index ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &align_row ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &align_col ); /* Take view into appropriate object */ PLA_Obj_view( open_objects_list[ obj_index ], m, n, align_row, align_col, &obj_dest ); /* Add the contents of the message to the local contents of the view */ PLA_API_read_contents_from_buffer_to_obj( m, n, obj_dest ); break; case PLA_API_TYPE_SYNC: put_sync_counter ++; break; case PLA_API_TYPE_GET: /* Enter put destination information in the buffer */ /* PLA_API_read_data_from_put_buffer( sizeof( MPI_Datatype ), ( char *) &datatype ); */ PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &obj_index ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &m ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &n ); PLA_API_read_data_from_put_buffer( sizeof( void * ), ( char * ) &local_buf ); PLA_API_read_data_from_put_buffer( sizeof( int ), ( char * ) &lda ); PLA_Obj_datatype( open_objects_list[ obj_index ], &datatype ); /* Add the contents of the object to the buffer */ PLA_API_add_matrix_from_recv_buffer_to_local( datatype, m, n, local_buf, lda ); break; default: PLA_Abort( "API illegal type", __LINE__, __FILE__ ); } } } PLA_Obj_free( &obj_dest ); return PLA_SUCCESS; }
void PLA_Trmm_left_lower_notrans( int diag, PLA_Obj alpha, PLA_Obj A, PLA_Obj B, int nb_alg ) { PLA_Obj ATL=NULL, ATR=NULL, A00=NULL, A01=NULL, A02=NULL, BT=NULL, B0=NULL, ABL=NULL, ABR=NULL, A10=NULL, A11=NULL, A12=NULL, BB=NULL, B1=NULL, A20=NULL, A21=NULL, A22=NULL, B2=NULL, A11_dup=NULL, B1_mv=NULL, A21_dpmv=NULL, B1_dpmv=NULL, ONE=NULL; int b, size_row; PLA_Create_constants_conf_to( A, NULL, NULL, &ONE ); PLA_Part_2x2( A, &ATL, /**/ &ATR, /* ************** */ &ABL, /**/ &ABR, 0, 0, /* submatrix */ PLA_BR ); PLA_Part_2x1( B, &BT, /***/ &BB, 0, /* length submatrix */ PLA_BOTTOM ); while ( TRUE ){ PLA_Obj_global_length( ATL, &size_row ); b = min( size_row, nb_alg ); if ( 0 == b ) break; PLA_Repart_2x2_to_3x3( ATL, /**/ ATR, &A00, &A01, /**/ &A02, /**/ &A10, &A11, /**/ &A12, /* ************* */ /* ********************* */ ABL, /**/ ABR, &A20, &A21, /**/ &A22, b, b, /* A11 from */ PLA_TL ); PLA_Repart_2x1_to_3x1( BT, &B0, &B1, /**/ /**/ BB, &B2, b, /* length B1 from */ PLA_TOP ); /* ********************************************************************* */ PLA_Obj_set_orientation( B1, PLA_PROJ_ONTO_ROW ); PLA_Pmvector_create_conf_to( B2, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, b, &B1_dpmv ); PLA_Pmvector_create_conf_to( B2, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, b, &A21_dpmv ); PLA_Copy( B1, B1_dpmv ); PLA_Copy( A21, A21_dpmv ); PLA_Local_gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, alpha, A21_dpmv, B1_dpmv, ONE, B2 ); PLA_Mscalar_create_conf_to( A11, PLA_ALL_ROWS, PLA_ALL_COLS, &A11_dup ); PLA_Copy( A11, A11_dup ); PLA_Local_trmm( PLA_LEFT, PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, diag, alpha, A11_dup, B1_dpmv ); PLA_Copy( B1_dpmv, B1 ); /* ********************************************************************* */ PLA_Cont_with_3x3_to_2x2( &ATL, /**/ &ATR, A00, /**/ A01, A02, /* ************** */ /* ****************** */ /**/ A10, /**/ A11, A12, &ABL, /**/ &ABR, A20, /**/ A21, A22, /* A11 added to */ PLA_BR ); PLA_Cont_with_3x1_to_2x1( &BT, B0, /***/ /**/ B1, &BB, B2, /* B1 added to */ PLA_BOTTOM ); } PLA_Obj_free( &ATL ); PLA_Obj_free( &ATR ); PLA_Obj_free( &A00 ); PLA_Obj_free( &A01 ); PLA_Obj_free( &A02 ); PLA_Obj_free( &BT ); PLA_Obj_free( &B0 ); PLA_Obj_free( &ABL ); PLA_Obj_free( &ABR ); PLA_Obj_free( &A10 ); PLA_Obj_free( &A11 ); PLA_Obj_free( &A11_dup ); PLA_Obj_free( &A12 ); PLA_Obj_free( &BB ); PLA_Obj_free( &B1 ); PLA_Obj_free( &B1_mv ); PLA_Obj_free( &B1_dpmv ); PLA_Obj_free( &A20 ); PLA_Obj_free( &A21 ); PLA_Obj_free( &A21_dpmv ); PLA_Obj_free( &A22 ); PLA_Obj_free( &B2 ); PLA_Obj_free( &ONE ); }
int PLA_Triangular_invert_lower( PLA_Obj A ) { int nb_alg, size, value = PLA_SUCCESS; PLA_Obj A_BR = NULL, A_11 = NULL, A_21 = NULL, A_21_dup = NULL, A_11_msc = NULL, Ainv = NULL, Ainv_BL = NULL, Ainv_10_11 = NULL, Ainv_10_11_dup = NULL, minus_one = NULL, zero = NULL, one = NULL; PLA_Template templ; PLA_Obj_template( A, &templ ); PLA_Environ_nb_alg( PLA_OP_PAN_PAN, templ, &nb_alg ); PLA_Create_constants_conf_to( A, &minus_one, &zero, &one ); PLA_Obj_vert_split_2( A, 0, &Ainv_BL, &A_BR ); while( TRUE ){ PLA_Obj_global_length( A_BR, &size ); if ( 0 == ( size = min( size, nb_alg ) ) ) break; PLA_Obj_split_4( A_BR, size, size, &A_11, PLA_DUMMY, &A_21, &A_BR ); PLA_Obj_view_shift( Ainv_BL, 0, 0, size, 0 ); PLA_Obj_horz_split_2( Ainv_BL, size, &Ainv_10_11, &Ainv_BL ); PLA_Mscalar_create_conf_to( A_11, PLA_ALL_ROWS, PLA_ALL_COLS, &A_11_msc ); PLA_Copy( A_11, A_11_msc ); PLA_Obj_set_orientation( Ainv_10_11, PLA_PROJ_ONTO_ROW ); PLA_Pmvector_create_conf_to( Ainv_BL, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, size, &Ainv_10_11_dup ); PLA_Pmvector_create_conf_to( Ainv_BL, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, size, &A_21_dup ); /* PLA_Trsm( PLA_SIDE_LEFT, PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, one, A_11, Ainv_10_11 ); */ PLA_Obj_set_to_identity( A_11 ); PLA_Copy( Ainv_10_11, Ainv_10_11_dup ); PLA_Local_trsm( PLA_SIDE_LEFT, PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, one, A_11_msc, Ainv_10_11_dup ); PLA_Copy( A_21, A_21_dup ); PLA_Obj_set_to_zero( A_21 ); PLA_Local_gemm( PLA_NO_TRANS, PLA_NO_TRANS, minus_one, A_21_dup, Ainv_10_11_dup, one, Ainv_BL ); PLA_Copy( Ainv_10_11_dup, Ainv_10_11 ); } PLA_Obj_free( &A_BR ); PLA_Obj_free( &A_11 ); PLA_Obj_free( &A_21 ); PLA_Obj_free( &Ainv ); PLA_Obj_free( &Ainv_BL ); PLA_Obj_free( &Ainv_10_11 ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); return PLA_SUCCESS; }
int PLA_LU_linpack_right( int nb_alg, PLA_Obj A, PLA_Obj pivots ) /* Blocked right-looking LU factorization; Input: nb_alg -- Algorithmic blocking size to be used A -- m x n PLA_Matrix contains original matrix to be factored pivots -- PLA_Mvector of width 1 and datatype MPI_INT vector of length min( m, n ) in which to store the pivot information Output: A -- m x n PLA_Matrix contains factors L and U s.t. P A_orig = L U pivots -- PLA_Mvector of width 1 and datatype MPI_INT integer vector that describes permutation matrix P returns value PLA_SUCCESS if factorization completes successfully */ { int value = PLA_SUCCESS, size, length, width; PLA_Obj A_BR = NULL, A_1 = NULL, A_B = NULL, A_11 = NULL, A_12 = NULL, A_21 = NULL, A_21_dpmv = NULL, A_1_mv = NULL, A_1_dpmv = NULL, A_11_msc = NULL, A_12_dpmv = NULL, A_12_mv = NULL, pivots_B = NULL, pivots_1 = NULL, pivots_1_msc = NULL, one = NULL, minus_one = NULL; /* Create usual duplicated scalar constants */ PLA_Create_constants_conf_to( A, &minus_one, NULL, &one ); /* Partition A = / A_TL A_TR \ \ A_BL A_BR / where A_TL is 0 x 0 A_BR will track the active part of matrix A */ PLA_Obj_view_all( A, &A_BR ); /* Partition A = / A_T \ \ A_B / where A_T is 0 x n A_B will track the part of A that needs to be pivoted */ PLA_Obj_view_all( A, &A_B ); /* Partition pivots = / pivots_T \ \ pivots_B / where pivots_T is 0 x 1 pivots_B will track the active part of the pivot vector */ PLA_Obj_view_all( pivots, &pivots_B ); /* Do until done */ while ( TRUE ) { /* Determine size of current panel and check if done */ PLA_Obj_global_length( A_BR, &length ); PLA_Obj_global_width ( A_BR, &width ); size = min( length, width ); if ( 0 == ( size = min( size, nb_alg ) ) ) break; /* Partition off the current column panel to be factored */ PLA_Obj_vert_split_2( A_BR, size, &A_1, PLA_DUMMY ); /* Create a multivector to hold current column panel, allowing all nodes to participate in the panel factorization */ PLA_Mvector_create_conf_to( A_1, size, &A_1_mv ); /* Copy the current column panel to the multivector */ PLA_Copy( A_1, A_1_mv ); /* Partition A_BR = / A_11 A_12 \ \ A_21 A_22 / where A_11 is size x size */ PLA_Obj_split_4( A_BR, size, size, &A_11, &A_12, &A_21, PLA_DUMMY ); /* Create a duplicated multiscalar to hold a copy of A_11 after the panel has been factored (side-effect of the panel factorization routine */ PLA_Mscalar_create_conf_to( A_11, PLA_ALL_ROWS, PLA_ALL_COLS, &A_11_msc ); /* Partition off the subvector of pivots that will hold the pivot information for the column panel to be factored */ PLA_Obj_horz_split_2( pivots_B, size, &pivots_1, &pivots_B ); /* Create a duplicated multiscalar in which to store the pivot information for the column panel factorization */ PLA_Mscalar_create_conf_to( pivots_1, PLA_ALL_ROWS, PLA_ALL_COLS, &pivots_1_msc ); /* Factor the current column panel, distributed as a multivector. the updated A_11 matrix will be returned duplicated to all nodes in A_11_msc */ value = PLA_LU_right_mv( A_1_mv, A_11_msc, pivots_1_msc ); if ( value != PLA_SUCCESS ) break; /* Copy the pivot information for the current column panel into pivots */ PLA_Copy( pivots_1_msc, pivots_1 ); /* Pivot the rows of the matrix consistent with the pivot information for the current column panel */ PLA_Apply_pivots_to_rows( A_BR, pivots_1_msc ); PLA_Obj_split_4( A_BR, size, size, PLA_DUMMY, PLA_DUMMY, PLA_DUMMY, &A_BR ); /* Copy A_12 so it is duplicated to all rows, in A_12_dpmv */ PLA_Obj_set_orientation( A_12, PLA_PROJ_ONTO_ROW ); PLA_Pmvector_create_conf_to( A_12, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, size, &A_12_dpmv ); /* Duplicate the current panel to all columns, in A_1_dpmv */ PLA_Pmvector_create_conf_to( A_1, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, size, &A_1_dpmv ); PLA_Copy( A_1_mv, A_1_dpmv ); /* Place the current column panel back into the matrix */ PLA_Copy( A_1_dpmv, A_1 ); /* View view the part of A_1_dpmv that holds the copy of the updated matrix A_21 */ PLA_Obj_horz_split_2( A_1_dpmv, size, PLA_DUMMY, &A_21_dpmv ); /* Update A_12 <- inv(L_11) A_12, while distributed in A_12_dpmv */ PLA_Mvector_create_conf_to( A_12_dpmv, size, &A_12_mv ); PLA_Copy( A_12, A_12_mv ); PLA_Local_trsm( PLA_SIDE_RIGHT, PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_UNIT_DIAG, one, A_11_msc, A_12_mv ); PLA_Copy( A_12_mv, A_12_dpmv ); /* Place the current row panel A_12 back into the matrix */ PLA_Copy( A_12_dpmv, A_12 ); /* Update A_22 (viewed by A_BR) <- A_22 - A_21 * A_12 */ PLA_Local_gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, minus_one, A_21_dpmv, A_12_dpmv, one, A_BR ); /* Update the view of the part of the matrix that still needs to be pivoted in subsequent steps */ PLA_Obj_horz_split_2( A_B, size, PLA_DUMMY, &A_B ); } PLA_Obj_free( &A_BR ); PLA_Obj_free( &A_1 ); PLA_Obj_free( &A_B ); PLA_Obj_free( &A_11 ); PLA_Obj_free( &A_12 ); PLA_Obj_free( &A_21 ); PLA_Obj_free( &A_21_dpmv ); PLA_Obj_free( &A_1_mv ); PLA_Obj_free( &A_1_dpmv ); PLA_Obj_free( &A_12_dpmv ); PLA_Obj_free( &A_12_mv ); PLA_Obj_free( &A_11_msc ); PLA_Obj_free( &pivots_B ); PLA_Obj_free( &pivots_1 ); PLA_Obj_free( &pivots_1_msc ); PLA_Obj_free( &one ); PLA_Obj_free( &minus_one ); return value; }
int main(int argc, char *argv[]) { MPI_Comm comm = MPI_COMM_NULL; MPI_Datatype datatype; PLA_Template templ = NULL; PLA_Obj A_orig = NULL, A = NULL, Q = NULL, diag = NULL, B = NULL, minus_one = NULL, zero = NULL, one = NULL; int n, nb_distr, nb_alg, error, parameters, sequential, me, nprocs, nprows, npcols, itype; double time, flops, d_abs_max, PLA_Local_abs_max(); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (me==0) { printf("enter mesh size:\n"); scanf("%d%d", &nprows, &npcols ); printf("mesh size = %d x %d \n", nprows, npcols ); printf("enter distr. block size:\n"); scanf("%d", &nb_distr ); printf("nb_distr = %d\n", nb_distr ); printf("enter alg. block size:\n"); scanf("%d", &nb_alg ); printf("nb_alg = %d\n", nb_alg ); printf("turn on error checking? (0 = NO, 1 = YES):\n"); scanf("%d", &error ); printf("error checking = %d\n", error ); printf("turn on parameter checking? (0 = NO, 1 = YES):\n"); scanf("%d", ¶meters ); printf("parameter checking = %d\n", parameters ); printf("turn on sequential checking? (0 = NO, 1 = YES):\n"); scanf("%d", &sequential ); printf("sequential checking = %d\n", sequential ); } MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(¶meters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&sequential, 1, MPI_INT, 0, MPI_COMM_WORLD); pla_Environ_set_nb_alg( PLA_OP_ALL_ALG, nb_alg ); PLA_Set_error_checking( error, parameters, sequential, FALSE ); /* PLA_Comm_1D_to_2D_ratio(MPI_COMM_WORLD, 1.0, &comm); */ PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); PLA_Init(comm); PLA_Temp_create( nb_distr, 0, &templ ); while ( TRUE ){ if (me==0) { printf("enter datatype:\n"); printf("-1 = quit\n"); printf(" 0 = float\n"); printf(" 1 = double\n"); printf(" 2 = complex\n"); printf(" 3 = double complex\n"); scanf("%d", &itype ); printf("itype = %d\n", itype ); } MPI_Bcast(&itype, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( itype == -1 ) break; switch( itype ){ case 0: datatype = MPI_FLOAT; break; case 1: datatype = MPI_DOUBLE; break; case 2: datatype = MPI_COMPLEX; break; case 3: datatype = MPI_DOUBLE_COMPLEX; break; default: PLA_Abort( "unknown datatype", __LINE__, __FILE__ ); } if (me==0) { printf("enter n:\n"); scanf("%d", &n ); printf("n = %d\n", n ); } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); PLA_Matrix_create( datatype, n, n, templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A_orig ); PLA_Matrix_create_conf_to( A_orig, &Q ); PLA_Matrix_create_conf_to( A_orig, &A ); PLA_Mvector_create( datatype, n, 1, templ, PLA_ALIGN_FIRST, &diag ); PLA_Create_constants_conf_to( A, &minus_one, &zero, &one ); create_diag( diag ); PLA_Create_sym_eigenproblem( PLA_LOWER_TRIANGULAR, 3, diag, A_orig, Q ); PLA_Copy( A_orig, A ); MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime (); PLA_Spectral_decomp( PLA_LOWER_TRIANGULAR, A, Q, diag ); MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime () - time; /******* Check answer *******/ /* Make A_orig symmetric */ PLA_Symmetrize( PLA_LOWER_TRIANGULAR, A_orig ); PLA_Matrix_create_conf_to( A_orig, &B ); PLA_Obj_set_to_zero( A ); PLA_Obj_set_diagonal( A, diag ); /* A_orig = A_orig - Q diag Q^T */ PLA_Gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, one, Q, A, zero, B ); PLA_Gemm( PLA_NO_TRANSPOSE, PLA_TRANSPOSE, minus_one, B, Q, one, A_orig ); /* Extract absolute value of entry with largest absolute value in A_orig */ d_abs_max = PLA_Local_abs_max( A_orig ); if ( d_abs_max > 0.000000001 ) printf( "large error detected: %le\n", d_abs_max ); flops = 4.0/3.0 * n * n * n; if ( me == 0 ) printf("%d time = %f, MFLOPS/node = %10.4lf \n", n, time, flops / time * 1.0e-6 / nprocs ); PLA_Obj_free( &A_orig ); PLA_Obj_free( &A ); PLA_Obj_free( &Q ); PLA_Obj_free( &diag ); PLA_Obj_free( &B ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); } PLA_Temp_free(&templ); PLA_Finalize( ); MPI_Finalize( ); }
int PLA_Form_Q_x( int trans, int uplo, PLA_Obj A, PLA_Obj s, PLA_Obj Q ) /* Purpose: Compute Q = H_1 * H_2 * ... * H_n-1 where H_i is the ith Householder transform stored in the ith column of A of uplo == PLA_LOWER_TRIANGULAR or the ith row of A if uplo == PLA_UPPER_TRIANGULAR and ith entry in s. If trans = PLA_NO_TRANS, Q is computed. Otherwise, Q^T is computed. Input: trans -- Indicates whether Q or Q^T is to be computed. uplo -- Indicates whether Householder vectors are stored above or below diagonal. A -- General mxn matrix A (PLA_MATRIX) s -- vector for storing scalar in Householder transforms (MVECTOR of length=min(m,n), width=1) Output: Q Return value: PLA_SUCCESS iff Q is computed successfully. */ { int size, length, width, nb_alg1, nb_alg2, nb_alg, proj_old; PLA_Template templ; PLA_Obj ATL = NULL, ABR = NULL, QBR = NULL, A1B = NULL, AB1_mv = NULL, W_mv = NULL, Y_mv = NULL, sL = NULL, s_cur = NULL, s_dup = NULL; if ( uplo == PLA_LOWER_TRIANGULAR ) return PLA_Form_Q( trans, A, s, Q ); /* Determine algorithmic blocking size */ PLA_Obj_template( A, &templ ); PLA_Environ_nb_alg( PLA_OP_PAN_PAN, templ, &nb_alg1 ); PLA_Environ_nb_alg( PLA_OP_PAN_MAT, templ, &nb_alg2 ); nb_alg = ( nb_alg1 > nb_alg2 ? nb_alg1 : nb_alg2 ); /* Q = I */ PLA_Obj_set_to_identity( Q ); /* Apply the Householder vectors like ( H_1 ( ... ( H_n-1 ( H_n I ) ) ... ) ) */ /* Initially partition A = / ATL | ATR \ where ATL is square and \ * | ABR / ABR has width 0 */ PLA_Obj_global_length( A, &length ); PLA_Obj_global_width ( A, &width ); width = min( length, width ); PLA_Obj_split_4( A, width, width, &ATL, PLA_DUMMY, PLA_DUMMY, &ABR ); PLA_Obj_split_4( Q, width, width, PLA_DUMMY, PLA_DUMMY, PLA_DUMMY, &QBR ); PLA_Obj_horz_split_2( s, width, &sL, PLA_DUMMY ); while ( TRUE ) { /* Check if done */ PLA_Obj_global_length( ATL, &length ); PLA_Obj_global_width( ATL, &width ); if ( ( size = min( min( length, width), nb_alg ) ) == 0 ) break; /* Grow ABR by the panel from which to compute the next WY transform. Grow QBR similarly */ PLA_Obj_view_shift( ABR, -size, -size, 0, 0 ); PLA_Obj_view_shift( QBR, -size, -size, 0, 0 ); /* Partition off A1B from which to compute the next WY transform */ PLA_Obj_horz_split_2( ABR, size, &A1B, PLA_DUMMY ); PLA_Obj_set_orientation( A1B, PLA_PROJ_ONTO_ROW ); /* Partition off the scaling factors and duplicate to all nodes */ PLA_Obj_horz_split_2( sL, -size, &sL, &s_cur ); PLA_Mscalar_create_conf_to( s_cur, PLA_ALL_ROWS, PLA_ALL_COLS, &s_dup ); /* Redistribute A1B as a multivector and compute W and Y */ PLA_Obj_get_orientation( QBR, &proj_old ); PLA_Obj_set_orientation( QBR, PLA_PROJ_ONTO_ROW ); PLA_Mvector_create_conf_to( QBR, size, &AB1_mv ); PLA_Mvector_create_conf_to( QBR, size, &W_mv ); PLA_Mvector_create_conf_to( QBR, size, &Y_mv ); PLA_Obj_set_orientation( QBR, proj_old ); PLA_Copy( A1B, AB1_mv ); PLA_Copy( s_cur, s_dup ); PLA_Compute_WY( AB1_mv, s_dup, W_mv, Y_mv ); if ( trans == PLA_NO_TRANSPOSE ) { /* Update QBR <- ( I + W Y^T ) QBR */ PLA_Apply_W_Y_transform ( PLA_SIDE_LEFT, PLA_NO_TRANSPOSE, W_mv, Y_mv, QBR ); } else { /* Update QBR <- QBR ( I + W^T Y ) QBR */ PLA_Apply_W_Y_transform ( PLA_SIDE_RIGHT, PLA_TRANSPOSE, W_mv, Y_mv, QBR ); } /* Update view of ATL */ PLA_Obj_split_4( ATL, -size, -size, &ATL, PLA_DUMMY, PLA_DUMMY, PLA_DUMMY ); } PLA_Obj_free( &ATL ); PLA_Obj_free( &ABR ); PLA_Obj_free( &QBR ); PLA_Obj_free( &A1B ); PLA_Obj_free( &AB1_mv ); PLA_Obj_free( &W_mv ); PLA_Obj_free( &Y_mv ); PLA_Obj_free( &s_cur ); PLA_Obj_free( &s_dup ); PLA_Obj_free( &sL ); return PLA_SUCCESS; }
void create_problem( PLA_Obj A, PLA_Obj x, PLA_Obj b ) { PLA_Obj zero = NULL, one = NULL, A_cur = NULL, A11 = NULL; int size, me, nprocs, i, j, fill_blocksize, this_fill_blocksize, type_size; double d_one = 1.0, time; void *locA; void *local_buf; int local_m, local_n, local_ldim, local_stride, global_length; MPI_Datatype datatype; PLA_Obj_global_length( A, &size ); PLA_Obj_datatype ( A, &datatype ); MPI_Type_size ( datatype, &type_size); MPI_Comm_rank( MPI_COMM_WORLD, &me ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); PLA_Create_constants_conf_to( A, NULL, &zero, &one ); srand48( me * 1793 ); PLA_Obj_local_length( A, &local_m ); PLA_Obj_local_width( A, &local_n ); PLA_Obj_local_buffer( A, (void **) &local_buf ); PLA_Obj_local_ldim( A, &local_ldim ); #define FILL_METHOD FILL_LOCAL_RANDOM #if FILL_METHOD /************************************************************************ Fill the matrices. NOTE: There are two versions of the fill routine in this file. The version directly below this comment simply fills the local portions of the matrix and vector with random numbers. To use this version, the line directly above this comment should read "#define FILL_METHOD FILL_LOCAL_RANDOM". The other version of the fill algorithm uses the PLAPACK Application Interface, which allows each processor to create a portion of the matrix or vector (regardless of the true location on the machine of that portion) and then submit the piece through a call to the PLAPACK API. To use this version of the algorithm, the line directly above this comment should read "#define FILL_METHOD FILL_THROUGH_API". The API version of the algorithm has a parameter called "fill_blocksize" that determines the width of the column blocks to be submitted to the matrix. The number of independent messages generated by the API is inversely proportional to the fill_blocksize, and significant network contention (or even deadlock on some systems) may occur if the fill_blocksize is taken too small. **********************************************************************/ for (j=0; j<local_n; j++ ) for (i=0; i<local_m; i++ ) if ( datatype == MPI_DOUBLE ) { ( (double *) local_buf )[ j*local_ldim + i ] = drand48() * 2.0 -1.0; } else if ( datatype == MPI_FLOAT ) { ( (float *) local_buf)[ j*local_ldim + i ] = (float) (drand48() * 2.0 -1.0); } else if ( datatype == MPI_DOUBLE_COMPLEX ) { ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].real = drand48() * 2.0 -1.0; ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].imaginary = drand48() * 2.0 -1.0; } else if ( datatype == MPI_COMPLEX ) { ((PLA_COMPLEX *)local_buf)[ j*local_ldim + i ].real = (float) drand48() * 2.0 -1.0; ((PLA_COMPLEX *)local_buf)[ j*local_ldim + i ].imaginary = (float) drand48() * 2.0 -1.0; } PLA_Obj_local_length( x, &local_m ); PLA_Obj_local_buffer( x, (void **) &local_buf ); PLA_Obj_local_stride( x, &local_stride ); for (i=0; i<local_m; i++ ) if ( datatype == MPI_DOUBLE ) { ((double*) local_buf)[ i*local_stride ] = drand48() * 2.0 -1.0; } else if ( datatype == MPI_FLOAT ) { ((float *)local_buf)[ i*local_stride ] = (float) (drand48() * 2.0 -1.0); } else if ( datatype == MPI_DOUBLE_COMPLEX ) { ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].real = drand48() * 2.0 -1.0; ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].imaginary = drand48() * 2.0 -1.0; } else if ( datatype == MPI_COMPLEX ) { ((PLA_COMPLEX *)local_buf)[ i*local_stride ].real = (float) drand48() * 2.0 -1.0; ((PLA_COMPLEX *)local_buf)[ i*local_stride ].imaginary = (float) drand48() * 2.0 -1.0; } #else /*********************************************************************************** Alternate version of the problem creation using the PLAPACK Application interface. To use, edit the line marked "PROBLEM CREATION METHOD" above to read "#define FILL_METHOD FILL_THROUGH_API". **********************************************************************************/ if ( 0 == me ) printf("Using PLAPACK application interface to create problem.\n"); MPI_Barrier ( MPI_COMM_WORLD); time = MPI_Wtime (); PLA_API_begin(); PLA_Obj_API_open(A); PLA_Obj_API_open(x); fill_blocksize = 10; locA = (void *) malloc( type_size * size * fill_blocksize ); for (j=me*fill_blocksize;j< size; j+=nprocs*fill_blocksize) { this_fill_blocksize = min( fill_blocksize, size - j); for (i=0; i < size*this_fill_blocksize; i++) { /* This loop determines the values to put into matrix */ if ( MPI_DOUBLE == datatype ) ((double *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_FLOAT == datatype ) ((float *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_DOUBLE_COMPLEX == datatype ) { ((double *)locA)[2*i]=drand48() * 2.0 - 1.0; ((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else if ( MPI_COMPLEX == datatype ) { ((float *)locA)[2*i]=drand48() * 2.0 - 1.0; ((float *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else printf("Unhandled datatype in create_problem().\n"); } PLA_API_axpy_matrix_to_global(size, this_fill_blocksize, &d_one, locA, size, A, 0, j ); } if (0==me) { /* processor zero alone fills the vector */ for (i=0; i<size; i++) if ( MPI_DOUBLE == datatype ) ((double *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_FLOAT == datatype ) ((float *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_DOUBLE_COMPLEX == datatype ) { ((double *)locA)[2*i]=drand48() * 2.0 - 1.0; ((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else if ( MPI_COMPLEX == datatype ) { ((float *)locA)[2*i]=drand48() * 2.0 - 1.0; ((float *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else printf("Unhandled datatype in create_problem().\n"); PLA_API_axpy_vector_to_global( size, &d_one, locA, 1, x, 0); } free( locA ); PLA_Obj_API_close(A); PLA_Obj_API_close(x); PLA_API_end(); MPI_Barrier ( MPI_COMM_WORLD); time = MPI_Wtime () - time; if ( 0 == me ) { printf("time for problem creation: %e seconds\n", time); } #endif /* Make A positive definite by adding a large value to the diagonal */ PLA_Obj_view_all( A, &A_cur ); PLA_Obj_global_length ( A, &global_length); while ( TRUE ){ PLA_Obj_global_length( A_cur, &size ); if ( 0 == size ) break; PLA_Obj_split_4( A_cur, 1, 1, &A11, PLA_DUMMY, PLA_DUMMY, &A_cur ); PLA_Obj_local_length( A11, &local_m ); PLA_Obj_local_width ( A11, &local_n ); if ( local_m == 1 && local_n == 1 ) { PLA_Obj_local_buffer( A11, (void **) &local_buf ); if ( datatype == MPI_DOUBLE ) *(double *)local_buf += (double) global_length; else if ( MPI_FLOAT == datatype ) *(float *)local_buf += (float) global_length; else if ( datatype == MPI_DOUBLE_COMPLEX){ ((double *)local_buf)[0] += (double) global_length; ((double *)local_buf)[1] = 0.0; } else if ( datatype == MPI_COMPLEX){ ((float *)local_buf)[0] += (float) global_length; ((float *)local_buf)[1] = 0.0; } else printf("Unhandled datatype in create_problem().\n"); } } if ( datatype == MPI_DOUBLE || datatype == MPI_FLOAT ) PLA_Symv( PLA_LOWER_TRIANGULAR, one, A, x, zero, b ); else PLA_Hemv( PLA_LOWER_TRIANGULAR, one, A, x, zero, b ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); PLA_Obj_free( &A_cur); PLA_Obj_free ( &A11); }
int PLA_Syrk_panpan( int nb_alg, int uplo, int transa, PLA_Obj alpha, PLA_Obj A, PLA_Obj beta, PLA_Obj C ) { PLA_Obj A_L = NULL, A_1 = NULL, A_1_dup = NULL, A_1_trans_dup = NULL, one = NULL; int size; PLA_Local_scal( beta, C ); PLA_Create_constants_conf_to( C, NULL, NULL, &one ); if ( transa == PLA_NO_TRANS ) PLA_Obj_global_width( A, &size ); else PLA_Obj_global_length( A, &size ); nb_alg = min( size, nb_alg ); PLA_Obj_view_all( A, &A_L ); PLA_Pmvector_create_conf_to( C, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, nb_alg, &A_1_dup ); PLA_Pmvector_create_conf_to( C, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, nb_alg, &A_1_trans_dup ); while ( TRUE ){ if ( transa == PLA_NO_TRANS ) PLA_Obj_global_width( A_L, &size ); else PLA_Obj_global_length( A_L, &size ); if ( ( size = min( size, nb_alg ) ) == 0 ) break; if ( transa == PLA_NO_TRANS ) PLA_Obj_vert_split_2( A_L, size, &A_1, &A_L ); else { PLA_Obj_horz_split_2( A_L, size, &A_1, &A_L ); PLA_Obj_set_orientation( A_1, PLA_PROJ_ONTO_ROW ); } if ( size != nb_alg ) { PLA_Obj_vert_split_2( A_1_dup, size, &A_1_dup, PLA_DUMMY ); PLA_Obj_horz_split_2( A_1_trans_dup, size, &A_1_trans_dup, PLA_DUMMY ); } PLA_Copy( A_1, A_1_dup ); PLA_Copy( A_1_dup, A_1_trans_dup ); if ( transa == PLA_CONJ_TRANS ) PLA_Conjugate( A_1_dup ); PLA_Syrk_perform_local_part( uplo, alpha, A_1_dup, A_1_trans_dup, one, C ); } PLA_Obj_free( &A_L); PLA_Obj_free( &A_1 ); PLA_Obj_free( &A_1_dup ); PLA_Obj_free( &A_1_trans_dup ); PLA_Obj_free( &one ); return PLA_SUCCESS; }
int PLA_Trmm_left_upper( int transa, int diag, PLA_Obj alpha, PLA_Obj A, PLA_Obj B ) { PLA_Obj A_TL = NULL, A_1 = NULL, A_1_dup = NULL, A_11_dup = NULL, B_T = NULL, B_1 = NULL, B_1_dup = NULL, A_BR = NULL, B_B = NULL, one = NULL, zero = NULL; int nb_alg, size; PLA_Template templ; PLA_Local_scal( alpha, B ); PLA_Create_constants_conf_to( A, NULL, &zero, &one ); PLA_Obj_template( A, &templ ); if ( transa == PLA_NO_TRANS ){ PLA_Obj_global_length( B, &size ); PLA_Environ_nb_alg( PLA_OP_PAN_MAT, templ, &nb_alg ); nb_alg = min( size, nb_alg ); PLA_Obj_view_all( A, &A_BR ); PLA_Obj_view_all( B, &B_B ); PLA_Pmvector_create_conf_to( B, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, nb_alg, &A_1_dup ); PLA_Pmvector_create_conf_to( B, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, nb_alg, &B_1_dup ); while ( TRUE ){ PLA_Obj_global_length( A_BR, &size ); if ( ( size = min( size, nb_alg ) ) == 0 ) break; PLA_Obj_horz_split_2( A_BR, size, &A_1, PLA_DUMMY ); PLA_Obj_set_orientation( A_1, PLA_PROJ_ONTO_ROW ); if ( size != nb_alg ){ PLA_Obj_vert_split_2( A_1_dup, size, &A_1_dup, PLA_DUMMY ); PLA_Obj_horz_split_2( B_1_dup, size, &B_1_dup, PLA_DUMMY ); } PLA_Copy( A_1, A_1_dup ); PLA_Obj_horz_split_2( A_1_dup, size, &A_11_dup, PLA_DUMMY ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, diag, A_11_dup ); PLA_Local_gemm( PLA_TRANS, PLA_NO_TRANS, one, A_1_dup, B_B, zero, B_1_dup ); PLA_Obj_horz_split_2( B_B, size, &B_1, &B_B ); PLA_Obj_set_orientation( B_1, PLA_PROJ_ONTO_ROW ); PLA_Reduce( B_1_dup, MPI_SUM, B_1 ); PLA_Obj_split_4( A_BR, size, size, PLA_DUMMY, PLA_DUMMY, PLA_DUMMY, &A_BR ); PLA_Obj_horz_split_2( A_1_dup, size, PLA_DUMMY, &A_1_dup ); } } else if ( transa == PLA_TRANS ){ /* transa == PLA_TRANS */ PLA_Obj_global_length( B, &size ); PLA_Environ_nb_alg( PLA_OP_PAN_MAT, templ, &nb_alg ); nb_alg = min( size, nb_alg ); PLA_Obj_view_all( A, &A_TL ); PLA_Obj_view_all( B, &B_T ); PLA_Pmvector_create_conf_to( B, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, nb_alg, &A_1_dup ); PLA_Pmvector_create_conf_to( B, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, nb_alg, &B_1_dup ); while ( TRUE ){ PLA_Obj_global_length( A_TL, &size ); if ( ( size = min( size, nb_alg ) ) == 0 ) break; PLA_Obj_vert_split_2( A_TL, -size, PLA_DUMMY, &A_1 ); PLA_Copy( A_1, A_1_dup ); PLA_Obj_horz_split_2( A_1_dup, -size, PLA_DUMMY, &A_11_dup ); PLA_Set_triang_to_zero( PLA_UPPER_TRIANGULAR, diag, A_11_dup ); PLA_Local_gemm( transa, PLA_NO_TRANS, one, A_1_dup, B_T, zero, B_1_dup ); PLA_Obj_horz_split_2( B_T, -size, &B_T, &B_1 ); PLA_Obj_set_orientation( B_1, PLA_PROJ_ONTO_ROW ); PLA_Reduce( B_1_dup, MPI_SUM, B_1 ); PLA_Obj_split_4( A_TL, -size, -size, &A_TL, PLA_DUMMY, PLA_DUMMY, PLA_DUMMY ); PLA_Obj_horz_split_2( A_1_dup, -size, &A_1_dup, PLA_DUMMY ); } } else PLA_Abort("Trmm: transpose case not yet implemented", __LINE__, __FILE__ ); PLA_Obj_free( &A_TL ); PLA_Obj_free( &A_BR ); PLA_Obj_free( &A_1 ); PLA_Obj_free( &A_1_dup ); PLA_Obj_free( &A_11_dup ); PLA_Obj_free( &B_T ); PLA_Obj_free( &B_B ); PLA_Obj_free( &B_1 ); PLA_Obj_free( &B_1_dup ); PLA_Obj_free( &one ); PLA_Obj_free( &zero ); return PLA_SUCCESS; }