int pmatsetup(void *MM, int m){ plapackM* ctx=(plapackM*)MM; MPI_Comm rowcomm,colcomm; int itmp,nprocs,info; DSDPFunctionBegin; ctx->global_size=m; info = MPI_Comm_size(ctx->mpi_comm,&nprocs); DSDPCHKERR(info); itmp=(m-nprocs+1)/nprocs; itmp=DSDPMax(2,itmp); ctx->nb_distr=DSDPMin(ctx->nb_distr,itmp); info = PLA_Comm_1D_to_2D_ratio(ctx->mpi_comm,ctx->ratio,&ctx->plapack_comm); DSDPCHKERR(info); info = PLA_Init(ctx->plapack_comm); DSDPCHKERR(info); info = PLA_Temp_create(ctx->nb_distr, 0, &ctx->templ); DSDPCHKERR(info); info=PLA_Matrix_create(MPI_DOUBLE, m, m, ctx->templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &ctx->AMat);DSDPCHKERR(info); info=PLA_Mvector_create(MPI_DOUBLE, m, 1, ctx->templ, PLA_ALIGN_FIRST, &ctx->vVec);DSDPCHKERR(info); info=PLA_Mvector_create(MPI_DOUBLE, m, 1, ctx->templ, PLA_ALIGN_FIRST, &ctx->wVec);DSDPCHKERR(info); info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->dxerror );DSDPCHKERR(info); info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->one );DSDPCHKERR(info); info=PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, ctx->templ, &ctx->zero );DSDPCHKERR(info); info=PLA_Obj_set_to_one(ctx->one);DSDPCHKERR(info); info=PLA_Obj_set_to_zero(ctx->zero);DSDPCHKERR(info); info = MPI_Comm_rank(ctx->plapack_comm,&ctx->rank); DSDPCHKERR(info); info = MPI_Comm_size(ctx->plapack_comm,&ctx->nprocs); DSDPCHKERR(info); info = PLA_Temp_comm_col_info(ctx->templ, &rowcomm, &ctx->rowrank, &ctx->numrownodes); DSDPCHKERR(info); info = PLA_Temp_comm_row_info(ctx->templ, &colcomm, &ctx->colrank, &ctx->numcolnodes); DSDPCHKERR(info); ctx->t0=0;ctx->t1=0;ctx->t2=0; ctx->thessian=0;ctx->tsolve=0; wallclock(&ctx->t0); DSDPFunctionReturn(0); }
int main(int argc, char *argv[]) { MPI_Comm comm = MPI_COMM_NULL; MPI_Datatype datatype; PLA_Template templ = NULL; PLA_Obj A = NULL, A_orig = NULL, residual = NULL, minus_one = NULL, one = NULL, diff = NULL; int n, nb_distr, nb_alg, error, parameters, sequential, me, nprocs, nprows, npcols, itype; double time, flops; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &me); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (me==0) { printf("enter mesh size:\n"); scanf("%d%d", &nprows, &npcols ); printf("mesh size = %d x %d \n", nprows, npcols ); printf("enter distr. block size:\n"); scanf("%d", &nb_distr ); printf("nb_distr = %d\n", nb_distr ); printf("enter alg. block size:\n"); scanf("%d", &nb_alg ); printf("nb_alg = %d\n", nb_alg ); printf("turn on error checking? (0 = NO, 1 = YES):\n"); scanf("%d", &error ); printf("error checking = %d\n", error ); printf("turn on parameter checking? (0 = NO, 1 = YES):\n"); scanf("%d", ¶meters ); printf("parameter checking = %d\n", parameters ); printf("turn on sequential checking? (0 = NO, 1 = YES):\n"); scanf("%d", &sequential ); printf("sequential checking = %d\n", sequential ); } MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&error, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(¶meters, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&sequential, 1, MPI_INT, 0, MPI_COMM_WORLD); pla_Environ_set_nb_alg( PLA_OP_ALL_ALG, nb_alg ); PLA_Set_error_checking( error, parameters, sequential, FALSE ); /* PLA_Comm_1D_to_2D_ratio(MPI_COMM_WORLD, 1.0, &comm); */ PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); PLA_Init(comm); PLA_Temp_create( nb_distr, 0, &templ ); while ( TRUE ) { if (me==0) { printf("enter datatype:\n"); printf("-1 = quit\n"); printf(" 0 = float\n"); printf(" 1 = double\n"); printf(" 2 = complex\n"); printf(" 3 = double complex\n"); scanf("%d", &itype ); printf("itype = %d\n", itype ); } MPI_Bcast(&itype, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( itype == -1 ) break; switch( itype ) { case 0: datatype = MPI_FLOAT; break; case 1: datatype = MPI_DOUBLE; break; case 2: datatype = MPI_COMPLEX; break; case 3: datatype = MPI_DOUBLE_COMPLEX; break; default: PLA_Abort( "unknown datatype", __LINE__, __FILE__ ); } if (me==0) { printf("enter n:\n"); scanf("%d", &n ); printf("n = %d\n", n ); } MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD); PLA_Matrix_create( datatype, n, n, templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A ); PLA_Matrix_create_conf_to( A, &A_orig ); PLA_Matrix_create_conf_to( A, &residual ); create_problem( A ); { double d_n; d_n = (double) n; PLA_Shift( A, MPI_DOUBLE, &d_n ); } PLA_Create_constants_conf_to( A, &minus_one, NULL, &one ); PLA_Local_copy( A, A_orig ); /* Use invert routine that uses factors */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime (); PLA_Triangular_invert( PLA_LOWER_TRIANGULAR, A ); MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime () - time; flops = 2.0/3.0 * n * n * n; if ( me == 0 ) printf("%d time = %f, MFLOPS/node = %10.4lf \n", n, time, flops / time * 1.0e-6 / nprocs ); PLA_Obj_set_to_identity( residual ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, A_orig ); PLA_Set_triang_to_zero( PLA_LOWER_TRIANGULAR, PLA_NONUNIT_DIAG, A ); PLA_Gemm( PLA_NO_TRANS, PLA_NO_TRANS, minus_one, A_orig, A, one, residual ); PLA_Mscalar_create( datatype, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &diff ); PLA_Matrix_one_norm( residual, diff ); } PLA_Obj_free( &A ); PLA_Obj_free( &A_orig ); PLA_Obj_free( &residual ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &one ); PLA_Obj_free( &diff ); PLA_Temp_free(&templ); PLA_Finalize( ); MPI_Finalize( ); }
int PLA_Symmetric_invert_exit( int uplo, PLA_Obj A ) { int value = PLA_SUCCESS, size_malloced; char routine_name[ 35 ]; PLA_Routine_stack_push( "PLA_Symmetric_invert_exit" ); if ( PLA_CHECK_AGAINST_SEQUENTIAL ){ PLA_Obj residual = NULL, norm_A = NULL, norm_residual = NULL, A_tmp = NULL, one = NULL, minus_one = NULL; double tol; MPI_Datatype datatype; PLA_Template templ; PLA_Obj_datatype( A, &datatype ); PLA_Obj_template( A, &templ ); if ( datatype == MPI_DOUBLE || datatype == MPI_DOUBLE_COMPLEX ) tol = 0.0000001; else tol = 0.0001; PLA_Matrix_create_conf_to( A, &residual ); PLA_Matrix_create_conf_to( A, &A_tmp ); PLA_Local_copy( A, A_tmp ); PLA_Symmetrize( PLA_LOWER_TRIANGULAR, A_tmp ); PLA_Symmetrize( PLA_LOWER_TRIANGULAR, A_cpy ); PLA_Obj_set_to_identity( residual ); PLA_Mscalar_create( datatype, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &norm_A ); PLA_Mscalar_create( datatype, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &norm_residual ); PLA_Matrix_one_norm( A_cpy, norm_A ); PLA_Create_constants_conf_to( A, &minus_one, NULL, &one ); PLA_Gemm( PLA_NO_TRANS, PLA_NO_TRANS, minus_one, A_tmp, A_cpy, one, residual ); PLA_Matrix_one_norm( residual, norm_residual ); if ( datatype == MPI_DOUBLE ){ double *norm_A_p, *norm_residual_p; PLA_Obj_local_buffer( norm_A, ( void **) &norm_A_p ); PLA_Obj_local_buffer( norm_residual, ( void ** ) &norm_residual_p ); if ( *norm_residual_p > tol * *norm_A_p ){ PLA_Warning( "PLA_Symmetric_invert: large relative error encountered" ); printf( "norm_1(residual) = %le, norm_1(A) = %le\n", *norm_residual_p, *norm_A_p ); value--; } } else PLA_Warning("residual check not supported for datatype"); PLA_Obj_free( &A_cpy ); PLA_Obj_free( &A_tmp ); PLA_Obj_free( &norm_A ); PLA_Obj_free( &norm_residual ); PLA_Obj_free( &minus_one ); PLA_Obj_free( &one ); } size_malloced = PLA_Total_size_malloced( ); if ( size_malloced != old_size_malloced ) PLA_Warning( "PLA_Symmetric_invert: memory discrepency" ); PLA_Routine_stack_pop( routine_name ); PLA_Routine_stack_pop( routine_name ); return value; }
int PLA_Conjugate( PLA_Obj x ) { int original_error = 0; int i; int object_type_x; int local_length_x, local_width_x, ld_x; int stride_x; MPI_Datatype datatype_x; PLA_Template platemplate; PLA_Obj Neg_one = NULL; void *x_buffer, *x_buffer_cur; void *Neg_one_buffer; PLA_Obj_local_length(x, &local_length_x); PLA_Obj_local_width (x, &local_width_x); PLA_Obj_datatype(x, &datatype_x); PLA_Obj_template(x, &platemplate); if( (MPI_DOUBLE == datatype_x) || (MPI_DOUBLE_COMPLEX == datatype_x)) PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, platemplate, &Neg_one ); else PLA_Mscalar_create( MPI_FLOAT, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, platemplate, &Neg_one ); PLA_Obj_set_to_minus_one(Neg_one); PLA_Obj_local_buffer(Neg_one, &Neg_one_buffer); PLA_Obj_local_ldim(x, &ld_x); PLA_Obj_objtype(x, &object_type_x); PLA_Obj_local_buffer(x, &x_buffer); PLA_Obj_local_stride(x, &stride_x); x_buffer_cur = x_buffer; if ( (MPI_DOUBLE == datatype_x) || (MPI_FLOAT == datatype_x)) local_length_x = local_length_x/2; /* treat AS complex -- not negate */ if(local_length_x * local_width_x > 0) { if( (MPI_DOUBLE_COMPLEX == datatype_x) || (MPI_DOUBLE == datatype_x)) { stride_x = stride_x * 2; for(i = 0; i < local_width_x; i++) { PLA_dscal(&local_length_x,(double*)Neg_one_buffer, (((double*)(x_buffer_cur))+1),&stride_x); x_buffer_cur = (void*)(((double*)(x_buffer_cur))+ld_x*2); } } if( (MPI_COMPLEX == datatype_x) || (MPI_FLOAT == datatype_x)) { stride_x = stride_x * 2; for(i = 0; i < local_width_x; i++) { PLA_sscal(&local_length_x,(float*)Neg_one_buffer, (((float*)(x_buffer_cur))+1),&stride_x); x_buffer_cur = (void*)(((float*)(x_buffer_cur))+ld_x*2); } } } PLA_Obj_free(&Neg_one); return(PLA_SUCCESS); }
int main(int argc, char *argv[]) { /* Declarations */ MPI_Comm comm; PLA_Template templ = NULL; PLA_Obj A = NULL, rhs = NULL, A_append = NULL, pivots = NULL, x = NULL, b = NULL, b_norm = NULL, index = NULL, minus_one = NULL; double operation_count, b_norm_value, time; int size, nb_distr, nb_alg, me, nprocs, nprows, npcols, dummy, ierror, info = 0; MPI_Datatype datatype; /* Initialize MPI */ MPI_Init(&argc, &argv); #if MANUFACTURE == CRAY set_d_stream( 1 ); #endif /* Get problem size and distribution block size and broadcast */ MPI_Comm_rank(MPI_COMM_WORLD, &me); if (0 == me) { printf("enter processor mesh dimension ( rows cols ):\n"); scanf("%d %d", &nprows, &npcols ); printf("enter matrix size, distr. block size:\n"); scanf("%d %d", &size, &nb_distr ); printf("enter algorithmic blocksize:\n"); scanf("%d", &nb_alg ); printf("Turn on error checking? (1 = YES, 0 = NO):\n"); scanf("%d", &ierror ); } MPI_Bcast(&nprows, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&npcols, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_distr, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&nb_alg, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&ierror, 1, MPI_INT, 0, MPI_COMM_WORLD); if ( ierror ) PLA_Set_error_checking( ierror, TRUE, TRUE, FALSE ); else PLA_Set_error_checking( ierror, FALSE, FALSE, FALSE ); pla_Environ_set_nb_alg (PLA_OP_ALL_ALG, nb_alg); /* Create a 2D communicator */ PLA_Comm_1D_to_2D(MPI_COMM_WORLD, nprows, npcols, &comm); /* Initialize PLAPACK */ PLA_Init(comm); /* Create object distribution template */ PLA_Temp_create( nb_distr, 0, &templ ); /* Set the datatype */ datatype = MPI_DOUBLE; /* Create objects for problem to be solved */ /* Matrix A is big enough to hold the right-hand-side appended */ PLA_Matrix_create( datatype, size, size+1, templ, PLA_ALIGN_FIRST, PLA_ALIGN_FIRST, &A_append ); PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &x ); PLA_Mvector_create( datatype, size, 1, templ, PLA_ALIGN_FIRST, &b ); PLA_Mvector_create( MPI_INT, size, 1, templ, PLA_ALIGN_FIRST, &pivots ); /* Create 1x1 multiscalars to hold largest (in abs. value) element of b - x and index of largest value */ PLA_Mscalar_create( MPI_DOUBLE, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ, &b_norm ); /* Create duplicated scalar constants with same datatype and template as A */ PLA_Create_constants_conf_to( A_append, &minus_one, NULL, NULL ); /* View the appended system as the matrix and the right-hand-side */ PLA_Obj_vert_split_2( A_append, -1, &A, &rhs ); /* Create a problem to be solved: A x = b */ create_problem( A, x, b ); /* Copy b to the appended column */ PLA_Copy( b, rhs ); /* Start timing */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime( ); /* Factor P A_append -> L U overwriting lower triangular portion of A with L, upper, U */ info = PLA_LU( A_append, pivots); if ( info != 0 ) { printf("Zero pivot encountered at row %d.\n", info); } else { /* Apply the permutations to the right hand sides */ /* Not necessery since system was appended */ /* PLA_Apply_pivots_to_rows ( b, pivots); */ /* Solve L y = b, overwriting b with y */ /* Not necessary since the system was appended */ /* PLA_Trsv( PLA_LOWER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_UNIT_DIAG, A, b ); */ PLA_Copy( rhs, b ); /* Solve U x = y (=b), overwriting b with x */ PLA_Trsv( PLA_UPPER_TRIANGULAR, PLA_NO_TRANSPOSE, PLA_NONUNIT_DIAG, A, b ); /* Stop timing */ MPI_Barrier( MPI_COMM_WORLD ); time = MPI_Wtime() - time; /* Report performance */ if ( me == 0 ) { MPI_Comm_size(MPI_COMM_WORLD, &nprocs); operation_count = 2.0/3.0 * size * size * size; printf("n = %d, time = %lf, MFLOPS/node = %lf\n", size, time, operation_count / time * 1.0e-6 / nprocs ); } /* Process the answer. As an example, this routine brings result x (stored in b) to processor 0 and prints first and last entry */ Process_answer( b ); /* Check answer by overwriting b <- b - x (where b holds computed approximation to x) */ PLA_Axpy( minus_one, x, b ); PLA_Nrm2( b, b_norm); /* Report norm of b - x */ if ( me == 0 ) { PLA_Obj_get_local_contents( b_norm, PLA_NO_TRANS, &dummy, &dummy, &b_norm_value, 1, 1 ); printf( "Norm2 of x - computed x : %le\n", b_norm_value ); } } printf("****************************************************************\n"); printf("* NOTE: while this driver times all operations performed by *\n"); printf("* a LINPACK benchmark, it does not use the ScaLAPACK random *\n"); printf("* matrix generator and thus according to the rules of the *\n"); printf("* LINPACK benchmark is not an official implementation. *\n"); printf("* Contact [email protected] if you are interested in creating *\n"); printf("* a version that does meet the rules. *\n"); printf("****************************************************************\n"); /* Free the linear algebra objects */ PLA_Obj_free(&A); PLA_Obj_free(&x); PLA_Obj_free(&b); PLA_Obj_free(&minus_one); PLA_Obj_free(&b_norm); PLA_Obj_free(&pivots); PLA_Obj_free(&A_append); PLA_Obj_free(&rhs); /* Free the template */ PLA_Temp_free(&templ); /* Finalize PLAPACK and MPI */ PLA_Finalize( ); MPI_Finalize( ); }
int PLA_Spectral_decomp( int uplo, PLA_Obj A, PLA_Obj Q, PLA_Obj diag ) /* PLA_Spectral_decomp Purpose: Compute spectral decomposition A = Q D Q^T where A is a given nxn symmetrix matrix A, D is diagonal with the eigenvalues of A on the diagonal and the columns of Q equal the eigenvectors of matrix A. input: uplo int indicates whether the symmetric matrix is stored in lower or upper triangular part of A A PLA_MATRIX matrix to be factored Q PLA_MATRIX matrix in which to return the eigenvectors diag PLA_MVECTOR of width 1 vector in which to return the eigenvalues of A output: A PLA_MATRIX overwritten with junk Q PLA_MATRIX if Q != NULL columns of Q equal the eigenvectors of A o/w eigenvectors are not computed diag PLA_MVECTOR of width 1 eigenvalues of A in ascending order */ { PLA_Obj s = NULL, tridiag = NULL, eigenvalues = NULL, Q_mv = NULL; PLA_Template templ; int length; MPI_Datatype datatype; /* Create a vector for the scaling factors computed during the reduction to tridiagonal form and perform reduction */ PLA_Mvector_create_conf_to( A, 1, &s ); /* Reduce A to tridiagonal form. If Q != NULL the Householder transforms are accumulated in Q */ PLA_Tri_red( uplo, A, s, Q ); /* Create a duplicated multiscalar in which to store the main diagonal and first subdiagonal of A */ PLA_Obj_template( A, &templ ); PLA_Obj_global_length( A, &length ); PLA_Obj_datatype( A, &datatype ); PLA_Mscalar_create( datatype, PLA_ALL_ROWS, PLA_ALL_COLS, length, 2, templ, &tridiag ); /* Copy diagonals to multiscalar */ PLA_Copy_sym_tridiag_to_msc( uplo, A, tridiag ); /* Locally solve the tridiagonal eigenproblem */ /* If Q != NULL, copy Q to a multivector */ if ( Q != NULL ){ PLA_Mvector_create_conf_to( Q, length, &Q_mv ); PLA_Copy( Q, Q_mv ); } PLA_Local_sym_tridiag_eig( tridiag, Q_mv ); /* Copy the eigenvalues to diag */ PLA_Obj_vert_split_2( tridiag, 1, &eigenvalues, PLA_DUMMY ); PLA_Copy( eigenvalues, diag ); /* if Q != NULL, copy Q_mv back to Q */ if ( Q != NULL ) PLA_Copy( Q_mv, Q ); PLA_Obj_free( &s ); PLA_Obj_free( &tridiag ); PLA_Obj_free( &eigenvalues ); PLA_Obj_free( &Q_mv ); return PLA_SUCCESS; }