static int pmatfactor(void*MM, int *flag){ plapackM* ctx=(plapackM*)MM; int info,dummy; double ddxerror; DSDPFunctionBegin; wallclock(&ctx->t1); info=PLA_Obj_set_to_one(ctx->wVec);DSDPCHKERR(info); info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info); info=PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->wVec, ctx->zero, ctx->vVec ); DSDPCHKERR(info); *flag=0; info = PLA_Chol(PLA_LOWER_TRIANGULAR, ctx->AMat); DSDPCHKERR(info); if (info!=0) { *flag=1; printf("PLAPACK WARNING: Non positive-definite Matrix M : Row: %d\n",info); } info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat, ctx->vVec);DSDPCHKERR(info); info = PLA_Trsv(PLA_LOWER_TRIANGULAR, PLA_TRANSPOSE, PLA_NONUNIT_DIAG, ctx->AMat,ctx->vVec); DSDPCHKERR(info); info=PLA_Obj_set_to_minus_one(ctx->wVec);DSDPCHKERR(info); info=PLA_Axpy( ctx->one, ctx->vVec, ctx->wVec );DSDPCHKERR(info); info=PLA_Nrm2( ctx->wVec, ctx->dxerror );DSDPCHKERR(info); PLA_Obj_get_local_contents( ctx->dxerror, PLA_NO_TRANS, &dummy, &dummy, &ddxerror, 1, 1 ); if (ddxerror/sqrt(1.0*ctx->global_size) > 0.1){ *flag=1; if (ctx->rank==-1){ printf("PDSDPPLAPACK: Non positive-definite Matrix. %4.2e\n",ddxerror); } } wallclock(&ctx->t2); ctx->tsolve+=ctx->t2-ctx->t1; PPDSDPPrintTime(ctx->rank,"PLAPACK: Factor M",ctx->t2-ctx->t1,ctx->tsolve); PPDSDPPrintTime(ctx->rank,"Subtotal Time",0,ctx->t2-ctx->t1); DSDPFunctionReturn(0); }
static int pmatmult(void *MM, double x[], double y[], int n){ plapackM* ctx=(plapackM*)MM; double d_one=1.0,drank=1.0/ctx->nprocs; int i,info; DSDPFunctionBegin; info=PLA_Obj_set_to_zero(ctx->vVec);DSDPCHKERR(info); info=PLA_Obj_set_to_zero(ctx->wVec);DSDPCHKERR(info); info=PLA_API_begin();DSDPCHKERR(info); info=PLA_Obj_API_open(ctx->vVec);DSDPCHKERR(info); info=PLA_API_axpy_vector_to_global(n, &d_one, x, 1, ctx->vVec, 0); DSDPCHKERR(info); /* Copy solution from PLAPACK vector to DSDPVector */ info=PLA_Obj_API_close(ctx->vVec); DSDPCHKERR(info); info=PLA_API_end(); DSDPCHKERR(info); PLA_Symv( PLA_LOWER_TRIANGULAR, ctx->one, ctx->AMat, ctx->vVec, ctx->zero, ctx->wVec ); /* Copy solution from PLAPACK vector to DSDPVector */ memset((void*)y,0,n*sizeof(double)); info=PLA_API_begin(); info=PLA_Obj_API_open(ctx->wVec); info=PLA_API_axpy_global_to_vector(n, &d_one, ctx->wVec, 0, y, 1); DSDPCHKERR(info); info=PLA_Obj_API_close(ctx->wVec); DSDPCHKERR(info); info=PLA_API_end(); DSDPCHKERR(info); for (i=0;i<n;i++){ y[i]*=drank;} /* Should be in PLA_API_axpy_vector_to_global */ DSDPFunctionReturn(0); }
void create_problem( PLA_Obj A, PLA_Obj x, PLA_Obj b ) { PLA_Obj zero = NULL, one = NULL, A_cur = NULL, A11 = NULL; int size, me, nprocs, i, j, fill_blocksize, this_fill_blocksize, type_size; double d_one = 1.0, time; void *locA; void *local_buf; int local_m, local_n, local_ldim, local_stride, global_length; MPI_Datatype datatype; PLA_Obj_global_length( A, &size ); PLA_Obj_datatype ( A, &datatype ); MPI_Type_size ( datatype, &type_size); MPI_Comm_rank( MPI_COMM_WORLD, &me ); MPI_Comm_size( MPI_COMM_WORLD, &nprocs ); PLA_Create_constants_conf_to( A, NULL, &zero, &one ); srand48( me * 1793 ); PLA_Obj_local_length( A, &local_m ); PLA_Obj_local_width( A, &local_n ); PLA_Obj_local_buffer( A, (void **) &local_buf ); PLA_Obj_local_ldim( A, &local_ldim ); #define FILL_METHOD FILL_LOCAL_RANDOM #if FILL_METHOD /************************************************************************ Fill the matrices. NOTE: There are two versions of the fill routine in this file. The version directly below this comment simply fills the local portions of the matrix and vector with random numbers. To use this version, the line directly above this comment should read "#define FILL_METHOD FILL_LOCAL_RANDOM". The other version of the fill algorithm uses the PLAPACK Application Interface, which allows each processor to create a portion of the matrix or vector (regardless of the true location on the machine of that portion) and then submit the piece through a call to the PLAPACK API. To use this version of the algorithm, the line directly above this comment should read "#define FILL_METHOD FILL_THROUGH_API". The API version of the algorithm has a parameter called "fill_blocksize" that determines the width of the column blocks to be submitted to the matrix. The number of independent messages generated by the API is inversely proportional to the fill_blocksize, and significant network contention (or even deadlock on some systems) may occur if the fill_blocksize is taken too small. **********************************************************************/ for (j=0; j<local_n; j++ ) for (i=0; i<local_m; i++ ) if ( datatype == MPI_DOUBLE ) { ( (double *) local_buf )[ j*local_ldim + i ] = drand48() * 2.0 -1.0; } else if ( datatype == MPI_FLOAT ) { ( (float *) local_buf)[ j*local_ldim + i ] = (float) (drand48() * 2.0 -1.0); } else if ( datatype == MPI_DOUBLE_COMPLEX ) { ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].real = drand48() * 2.0 -1.0; ((PLA_DOUBLE_COMPLEX *)local_buf)[ j*local_ldim + i ].imaginary = drand48() * 2.0 -1.0; } else if ( datatype == MPI_COMPLEX ) { ((PLA_COMPLEX *)local_buf)[ j*local_ldim + i ].real = (float) drand48() * 2.0 -1.0; ((PLA_COMPLEX *)local_buf)[ j*local_ldim + i ].imaginary = (float) drand48() * 2.0 -1.0; } PLA_Obj_local_length( x, &local_m ); PLA_Obj_local_buffer( x, (void **) &local_buf ); PLA_Obj_local_stride( x, &local_stride ); for (i=0; i<local_m; i++ ) if ( datatype == MPI_DOUBLE ) { ((double*) local_buf)[ i*local_stride ] = drand48() * 2.0 -1.0; } else if ( datatype == MPI_FLOAT ) { ((float *)local_buf)[ i*local_stride ] = (float) (drand48() * 2.0 -1.0); } else if ( datatype == MPI_DOUBLE_COMPLEX ) { ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].real = drand48() * 2.0 -1.0; ((PLA_DOUBLE_COMPLEX *)local_buf)[ i*local_stride ].imaginary = drand48() * 2.0 -1.0; } else if ( datatype == MPI_COMPLEX ) { ((PLA_COMPLEX *)local_buf)[ i*local_stride ].real = (float) drand48() * 2.0 -1.0; ((PLA_COMPLEX *)local_buf)[ i*local_stride ].imaginary = (float) drand48() * 2.0 -1.0; } #else /*********************************************************************************** Alternate version of the problem creation using the PLAPACK Application interface. To use, edit the line marked "PROBLEM CREATION METHOD" above to read "#define FILL_METHOD FILL_THROUGH_API". **********************************************************************************/ if ( 0 == me ) printf("Using PLAPACK application interface to create problem.\n"); MPI_Barrier ( MPI_COMM_WORLD); time = MPI_Wtime (); PLA_API_begin(); PLA_Obj_API_open(A); PLA_Obj_API_open(x); fill_blocksize = 10; locA = (void *) malloc( type_size * size * fill_blocksize ); for (j=me*fill_blocksize;j< size; j+=nprocs*fill_blocksize) { this_fill_blocksize = min( fill_blocksize, size - j); for (i=0; i < size*this_fill_blocksize; i++) { /* This loop determines the values to put into matrix */ if ( MPI_DOUBLE == datatype ) ((double *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_FLOAT == datatype ) ((float *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_DOUBLE_COMPLEX == datatype ) { ((double *)locA)[2*i]=drand48() * 2.0 - 1.0; ((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else if ( MPI_COMPLEX == datatype ) { ((float *)locA)[2*i]=drand48() * 2.0 - 1.0; ((float *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else printf("Unhandled datatype in create_problem().\n"); } PLA_API_axpy_matrix_to_global(size, this_fill_blocksize, &d_one, locA, size, A, 0, j ); } if (0==me) { /* processor zero alone fills the vector */ for (i=0; i<size; i++) if ( MPI_DOUBLE == datatype ) ((double *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_FLOAT == datatype ) ((float *)locA)[i]=drand48() * 2.0 - 1.0; else if ( MPI_DOUBLE_COMPLEX == datatype ) { ((double *)locA)[2*i]=drand48() * 2.0 - 1.0; ((double *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else if ( MPI_COMPLEX == datatype ) { ((float *)locA)[2*i]=drand48() * 2.0 - 1.0; ((float *)locA)[2*i+1]=drand48() * 2.0 - 1.0; } else printf("Unhandled datatype in create_problem().\n"); PLA_API_axpy_vector_to_global( size, &d_one, locA, 1, x, 0); } free( locA ); PLA_Obj_API_close(A); PLA_Obj_API_close(x); PLA_API_end(); MPI_Barrier ( MPI_COMM_WORLD); time = MPI_Wtime () - time; if ( 0 == me ) { printf("time for problem creation: %e seconds\n", time); } #endif /* Make A positive definite by adding a large value to the diagonal */ PLA_Obj_view_all( A, &A_cur ); PLA_Obj_global_length ( A, &global_length); while ( TRUE ){ PLA_Obj_global_length( A_cur, &size ); if ( 0 == size ) break; PLA_Obj_split_4( A_cur, 1, 1, &A11, PLA_DUMMY, PLA_DUMMY, &A_cur ); PLA_Obj_local_length( A11, &local_m ); PLA_Obj_local_width ( A11, &local_n ); if ( local_m == 1 && local_n == 1 ) { PLA_Obj_local_buffer( A11, (void **) &local_buf ); if ( datatype == MPI_DOUBLE ) *(double *)local_buf += (double) global_length; else if ( MPI_FLOAT == datatype ) *(float *)local_buf += (float) global_length; else if ( datatype == MPI_DOUBLE_COMPLEX){ ((double *)local_buf)[0] += (double) global_length; ((double *)local_buf)[1] = 0.0; } else if ( datatype == MPI_COMPLEX){ ((float *)local_buf)[0] += (float) global_length; ((float *)local_buf)[1] = 0.0; } else printf("Unhandled datatype in create_problem().\n"); } } if ( datatype == MPI_DOUBLE || datatype == MPI_FLOAT ) PLA_Symv( PLA_LOWER_TRIANGULAR, one, A, x, zero, b ); else PLA_Hemv( PLA_LOWER_TRIANGULAR, one, A, x, zero, b ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); PLA_Obj_free( &A_cur); PLA_Obj_free ( &A11); }
int PLA_Apply_sym_House( int uplo, PLA_Obj A, PLA_Obj u, PLA_Obj beta ) /* PLA_Apply_sym_House Purpose: Apply a Householder orthogonal similarity transformation to matrix A: Form A <- ( I + beta u u^T ) A ( I + beta u u^T ) = A + beta u w^T + beta w u^T where w = v + beta/2 u^T v u and v = A u Assumptions: A is a PLA_MATRIX, u is a PLA_MVECTOR of width 1 with (implicitly) unit first entry, and beta is a PLA_MSCALAR duplicated on all nodes. */ { PLA_Obj u_1 = NULL, u_1_copy = NULL, v = NULL, zero = NULL, one = NULL, two = NULL, alpha = NULL; double d_two = 2.0; PLA_Create_constants_conf_to( A, NULL, &zero, &one ); /* Set first entry of u to one, saving the old value in u_1_copy */ PLA_Obj_horz_split_2( u, 1, &u_1, PLA_DUMMY ); PLA_Mvector_create_conf_to( u_1, 1, &u_1_copy ); PLA_Local_copy( u_1, u_1_copy ); PLA_Obj_set_to_one( u_1 ); /* Compute v = A u */ PLA_Mvector_create_conf_to( u, 1, &v ); PLA_Symv( uplo, one, A, u, zero, v ); /* Compute alpha = u^T v */ PLA_Mscalar_create_conf_to( beta, PLA_ALL_ROWS, PLA_ALL_COLS, &alpha ); PLA_Dot( u, v, alpha ); /* Compute alpha = beta/2 * u^T v */ PLA_Mscalar_create_conf_to( beta, PLA_ALL_ROWS, PLA_ALL_COLS, &two ); PLA_Obj_set( two, MPI_DOUBLE, &d_two ); PLA_Local_scal( beta, alpha ); PLA_Local_inv_scal( two, alpha ); /* Compute v = w = v + beta/2 * u^T v u */ PLA_Local_axpy( alpha, u, v ); /* Update A = A + beta u w^T + beta w u^T */ PLA_Syr2( uplo, beta, u, v, A ); /* Restore first entry of u */ PLA_Local_copy( u_1_copy, u_1 ); /* Free temporary objects */ PLA_Obj_free( &u_1 ); PLA_Obj_free( &u_1_copy ); PLA_Obj_free( &v ); PLA_Obj_free( &zero ); PLA_Obj_free( &one ); PLA_Obj_free( &two ); PLA_Obj_free( &alpha ); return PLA_SUCCESS; }