static int pmatfactor(void*MM, int *flag){ plapackM* ctx=(plapackM*)MM; int info,dummy; double ddxerror; DSDPFunctionBegin; wallclock(&ctx->t1); info=PLA_Obj_set_to_one(ctx->wVec);DSDPCHKERR(info); info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info); info=PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->wVec, ctx->zero, ctx->vVec ); DSDPCHKERR(info); *flag=0; info = PLA_Chol(PLA_LOWER_TRIANGULAR, ctx->AMat); DSDPCHKERR(info); if (info!=0) { *flag=1; printf("PLAPACK WARNING: Non positive-definite Matrix M : Row: %d\n",info); } info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat, ctx->vVec);DSDPCHKERR(info); info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat,ctx->vVec); DSDPCHKERR(info); info=PLA_Obj_set_to_minus_one(ctx->wVec);DSDPCHKERR(info); info=PLA_Axpy( ctx->one, ctx->vVec, ctx->wVec );DSDPCHKERR(info); info=PLA_Nrm2( ctx->wVec, ctx->dxerror );DSDPCHKERR(info); PLA_Obj_get_local_contents( ctx->dxerror, PLA_NO_TRANS, &dummy, &dummy, &ddxerror, 1, 1 ); if (ddxerror/sqrt(1.0*ctx->global_size) > 0.1){ *flag=1; if (ctx->rank==-1){ printf("PDSDPPLAPACK: Non positive-definite Matrix. %4.2e\n",ddxerror); } } wallclock(&ctx->t2); ctx->tsolve+=ctx->t2-ctx->t1; PPDSDPPrintTime(ctx->rank,"PLAPACK: Factor M",ctx->t2-ctx->t1,ctx->tsolve); PPDSDPPrintTime(ctx->rank,"Subtotal Time",0,ctx->t2-ctx->t1); DSDPFunctionReturn(0); }
int PLA_Bi_red( PLA_Obj A, PLA_Obj sL, PLA_Obj sR, PLA_Obj U, PLA_Obj V ) /* PLA_Tri_red Purpose: Reduce general matrix A to bidiagonal form using Householder similarity transformations. input: A MATRIX to be reduced output: A Reduced matrix A. Householder vectors used to reduce A are stored below subdiagonal of A and above first superdiagonal of A. sL Scaling factors for the Householder transforms computed to reduce A (applied to left of A). sR Scaling factors for the Householder transforms computed to reduce A (applied to right of A). U if U != NULL, U equals the accumulation of Householder transforms that were applied from the left. V if V != NULL, V equals the accumulation of Householder transforms that were applied from the right. */ { PLA_Obj uv = NULL, uv_B = NULL, u = NULL, v = NULL, sR_B = NULL, betaR_1 = NULL, betaR_1_dup = NULL, sL_B = NULL, betaL_1 = NULL, betaL_1_dup = NULL, A_BR = NULL, a_21 = NULL, A_21 = NULL, u_11 = NULL, u_12 = NULL, u_21 = NULL, U_22 = NULL; v_11 = NULL, v_12 = NULL, v_21 = NULL, V_22 = NULL; int size, value = PLA_SUCCESS; if ( PLA_ERROR_CHECKING ) /* Perform parameter and error checking */ value = PLA_Bi_red_enter( A, sR, sL, U, V ); /* The whole purpose of the game is to update most of the matrix A = A - ( u | w_R ) / 0 | w_L^T \ \ 0 | v^T / where u equals the Householder transform that hits the active part of A from the left and v equals the Householder transform that hits the active part from the right: A <- ( I - betaL_1 u u^T ) A ( I - betaR_1 / 0 \ ( 0 v^T ) \ v / */ /* Create a multivector in which to compute ( u | w_R ) */ PLA_Mvector_create_conf_to( A, 1, &uwR ); /* Create a multivector in which to compute ( w_L | v ) */ PLA_Mvector_create_conf_to( A, 1, &wLv ); /* Create duplicated multiscalars in which to hold the scaling factor for the Householder transforms being computed */ PLA_Obj_horz_split_2( sL, 1, &betaL_1, PLA_DUMMY ); PLA_Mscalar_create_conf_to( betaL_1, PLA_ALL_ROWS, PLA_ALL_COLS, &betaL_1_dup ); PLA_Obj_horz_split_2( sR, 1, &betaR_1, PLA_DUMMY ); PLA_Mscalar_create_conf_to( betaR_1, PLA_ALL_ROWS, PLA_ALL_COLS, &betaR_1_dup ); /* Track the active parts of A, sL, sR, and uwR and wLv */ PLA_Obj_view_all( A, &A_BR ); PLA_Obj_view_all( sL, &betaL_B ); PLA_Obj_view_all( sR, &betaR_B ); PLA_Obj_view_all( uwR, &uwR_B ); PLA_Obj_horz_split_2( wLv, 1, PLA_DUMMY, &wLv_B ); while ( TRUE ){ PLA_Obj_global_length( A_BR, &size ); if ( 0 == size ) break; /* Partition A_BR = ( a_L A_R ) where a_L is a column from which to compute the next u */ PLA_Obj_vert_split_2( A_BR, 1, &a_L, &A_R ); /* Split of the current element of vector sL */ PLA_Obj_horz_split_2( betaL_B, 1, &betaL_1, &betaL_B ); /* View the part of uwR in which to compute u and wR */ PLA_Obj_vert_split_2( uwR_B, 1, &u, &wR ); /* View the part of wLv in which to compute w_L and v */ PLA_Obj_vert_split_2( wLv_B, 1, &w_L, &v ); /* Redistributed a_L as a vector and compute Householder transform */ PLA_Copy( a_L, u ); PLA_Compute_House_v( u, betaL_1_dup ); /* Place data back in A and sL */ PLA_Copy( u, a_L ); PLA_Local_copy( betaL_1_dup, betaL_1 ); /* Partition A_BR = / * a_12^T \ \ * A_22 / splitting off first column and row */ PLA_Obj_vert_split_4( A_BR, 1, 1, PLA_DUMMY, &a_12, PLA_DUMMY, &A_BR ); /* Copy a_12 into v */ PLA_Obj_set_orientation( PLA_PROJ_ONTO_ROW, a_12 ); PLA_Copy( a_12, v ); /* w_L = betaL_1 * A_BR^T * u */ First must split off first element of u */ PLA_Obj_horz_split_2( u, 1, PLA_DUMMY, &u_2 ); PLA_Gemv( PLA_TRANSPOSE, betaL_1_dup, A_BR, u_2, zero, w_L ); /* In v update a_12 = a_12 - betaL_1 * A_R^T u = = a_12 - betaL_1 * a_12 - w_L */ PLA_Negate( betaL_1 ); PLA_Axpy( betaL_1, v, v ); PLA_Axpy( minus_one, w_L, v ); /* Compute Householder transform to reflect resulting v */ PLA_Compute_House_v( v, betaR_1_dup ); /* Place data back in A and sR */ PLA_Copy( v, a_12 ); PLA_Local_copy( betaR_1_dup, betaR_1 ); /* Set first element of v = 1 */ PLA_Obj_horz_split_2( v, 1, &v_1, PLA_DUMMY ); PLA_Obj_set_to_one( v_1 ); /* Update A_BR <- ( I - betaL_1 u_2 u_2^T ) A_BR ( I - betaR_1 v v^T ) = ( A_BR - u_2 ( betaL_1 u_2^T A_BR ) ) ( I - betaR_1 v v^T ) = A_BR - ( u_2 | w_R ) * ( w_L | v ) where w_L = betaL_1 * A_BR^T u_2, w_R = betaR_1 * A_BR * v - betaR_1 * w_L^T * v * u_2 */ /* Compute w_R */ PLA_Gemv( PLA_NO_TRANSPOSE, betaR_1, A_BR, v, zero, w_R ); PLA_Dot( x_L, v, temp ); PLA_Scal( betaR_1_dup, temp ); PLA_Negate( temp ); PLA_Axpy( temp, u_2, w_R ); /* Update A_BR */ PLA_Pmvector_create_conf_to( A_BR, PLA_PROJ_ONTO_COL, PLA_ALL_COLS, 2, &uwR_2_dup ); PLA_Copy( uwR_2, uwR_2_dup ); PLA_Pmvector_create_conf_to( A_BR, PLA_PROJ_ONTO_ROW, PLA_ALL_ROWS, 2, &wLv_dup ); PLA_Copy( wLv, wLv_dup ); PLA_Gemm( PLA_NO_TRANSPOSE, PLA_NO_TRANSPOSE, minus_one, uwR_2, wLv, one, A_BR ); } /* Free the temporary objects */ PLA_Obj_free( &u ); PLA_Obj_free( &u_B ); PLA_Obj_free( &beta_B ); PLA_Obj_free( &beta_1 ); PLA_Obj_free( &beta_1_dup ); PLA_Obj_free( &A_BR ); PLA_Obj_free( &a_21 ); PLA_Obj_free( &A_21 ); PLA_Obj_free( &q_11 ); PLA_Obj_free( &q_12 ); PLA_Obj_free( &q_21 ); PLA_Obj_free( &Q_22 ); if ( PLA_ERROR_CHECKING ) value = PLA_Bi_red_exit( A, sL, sR, U, V ); return PLA_SUCCESS; }
int main(int argc, char *argv[]) { /* Declarations */ MPI_Comm comm; PLA_Template templ = NULL; PLA_Obj A = NULL, rhs = NULL, A_append = NULL, pivots = NULL, x = NULL, b = NULL, b_norm = NULL, index = NULL, minus_one = NULL; double operation_count, b_norm_value, time; int size, nb_distr, nb_alg, me, nprocs, nprows, npcols, dummy, ierror, info = 0; MPI_Datatype datatype; /* Initialize MPI */ MPI_Init(&argc, &argv); #if MANUFACTURE == CRAY set_d_stream( 1 ); #endif /* Get problem size and distribution block size and broadcast */ MPI_Comm_rank(MPI_COMM_WORLD, &me); if (0 == me) { printf("enter processor mesh dimension ( rows cols ):\n"); scanf("%d %d", &nprows, &npcols ); printf("enter matrix size, distr. block size:\n"); scanf("%d %d", &size, &nb_distr ); printf("enter algorithmic blocksize:\n"); scanf("%d", &nb_alg ); printf("Turn on error checking? (1 = YES, 0 = NO):\n"); scanf("%d", &ierror ); } MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ierror, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( ierror ) PLA_Set_error_checking( ierror, TRUE, TRUE, FALSE ); else PLA_Set_error_checking( ierror, FALSE, FALSE, FALSE ); pla_Environ_set_nb_alg (PLA_OP_ALL_ALG, nb_alg); /* Create a 2D communicator */ PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); /* Initialize PLAPACK */ PLA_Init(comm); /* Create object distribution template */ PLA_Temp_create( nb_distr, 0, &templ ); /* Set the datatype */ datatype = MPI_DOUBLE; /* Create objects for problem to be solved */ /* Matrix A is big enough to hold the right-hand-side appended */ PLA_Matrix_create( datatype, size, size+1, templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A_append ); PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &x ); PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &b ); PLA_Mvector_create( MPI_INT, size, 1, templ, PLA_ALIGN_FIRST, &pivots ); /* Create 1x1 multiscalars to hold largest (in abs. value) element of b - x and index of largest value */ PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &b_norm ); /* Create duplicated scalar constants with same datatype and template as A */ PLA_Create_constants_conf_to( A_append, &minus_one, NULL, NULL ); /* View the appended system as the matrix and the right-hand-side */ PLA_Obj_vert_split_2( A_append, -1, &A, &rhs ); /* Create a problem to be solved: A x = b */ create_problem( A, x, b ); /* Copy b to the appended column */ PLA_Copy( b, rhs ); /* Start timing */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime( ); /* Factor P A_append -> L U overwriting lower triangular portion of A with L, upper, U */ info = PLA_LU( A_append, pivots); if ( info != 0 ) { printf("Zero pivot encountered at row %d.\n", info); } else { /* Apply the permutations to the right hand sides */ /* Not necessery since system was appended */ /* PLA_Apply_pivots_to_rows ( b, pivots); */ /* Solve L y = b, overwriting b with y */ /* Not necessary since the system was appended */ /* PLA_Trsv( PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_UNIT_DIAG, A, b ); */ PLA_Copy( rhs, b ); /* Solve U x = y (=b), overwriting b with x */ PLA_Trsv( PLA_UPPER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, A, b ); /* Stop timing */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime() - time; /* Report performance */ if ( me == 0 ) { MPI_Comm_size(MPI_COMM_WORLD, &nprocs); operation_count = 2.0/3.0 * size * size * size; printf("n = %d, time = %lf, MFLOPS/node = %lf\n", size, time, operation_count / time * 1.0e-6 / nprocs ); } /* Process the answer. As an example, this routine brings result x (stored in b) to processor 0 and prints first and last entry */ Process_answer( b ); /* Check answer by overwriting b <- b - x (where b holds computed approximation to x) */ PLA_Axpy( minus_one, x, b ); PLA_Nrm2( b, b_norm); /* Report norm of b - x */ if ( me == 0 ) { PLA_Obj_get_local_contents( b_norm, PLA_NO_TRANS, &dummy, &dummy, &b_norm_value, 1, 1 ); printf( "Norm2 of x - computed x : %le\n", b_norm_value ); } } printf("****************************************************************\n"); printf("* NOTE: while this driver times all operations performed by *\n"); printf("* a LINPACK benchmark, it does not use the ScaLAPACK random *\n"); printf("* matrix generator and thus according to the rules of the *\n"); printf("* LINPACK benchmark is not an official implementation. *\n"); printf("* Contact [email protected] if you are interested in creating *\n"); printf("* a version that does meet the rules. *\n"); printf("****************************************************************\n"); /* Free the linear algebra objects */ PLA_Obj_free(&A); PLA_Obj_free(&x); PLA_Obj_free(&b); PLA_Obj_free(&minus_one); PLA_Obj_free(&b_norm); PLA_Obj_free(&pivots); PLA_Obj_free(&A_append); PLA_Obj_free(&rhs); /* Free the template */ PLA_Temp_free(&templ); /* Finalize PLAPACK and MPI */ PLA_Finalize( ); MPI_Finalize( ); }