/* SVD */ SEXP R_PDGESVD(SEXP M, SEXP N, SEXP ASIZE, SEXP A, SEXP DESCA, SEXP ULDIM, SEXP DESCU, SEXP VTLDIM, SEXP DESCVT, SEXP JOBU, SEXP JOBVT, SEXP INPLACE) { R_INIT; double *A_OUT; int IJ = 1, temp_lwork = -1; double temp_A = 0, temp_work = 0, *WORK; SEXP RET, RET_NAMES, INFO, D, U, VT; newRvec(INFO, 1, "int"); newRvec(D, INT(ASIZE, 0), "dbl"); newRmat(U, INT(ULDIM, 0), INT(ULDIM, 1), "dbl"); newRmat(VT, INT(VTLDIM, 0), INT(VTLDIM, 1), "dbl"); // Query size of workspace INT(INFO, 0) = 0; pdgesvd_(STR(JOBU, 0), STR(JOBVT, 0), INTP(M), INTP(N), &temp_A, &IJ, &IJ, INTP(DESCA), &temp_A, &temp_A, &IJ, &IJ, INTP(DESCU), &temp_A, &IJ, &IJ, INTP(DESCVT), &temp_work, &temp_lwork, INTP(INFO)); // Allocate work vector and calculate svd temp_lwork = (int) temp_work; temp_lwork = nonzero(temp_lwork); WORK = (double *) R_alloc(temp_lwork, sizeof(double)); A_OUT = (double *) R_alloc(nrows(A)*ncols(A), sizeof(double)); memcpy(A_OUT, REAL(A), nrows(A)*ncols(A)*sizeof(double)); pdgesvd_(STR(JOBU, 0), STR(JOBVT, 0), INTP(M), INTP(N), A_OUT, &IJ, &IJ, INTP(DESCA), REAL(D), REAL(U), &IJ, &IJ, INTP(DESCU), REAL(VT), &IJ, &IJ, INTP(DESCVT), WORK, &temp_lwork, INTP(INFO)); // Manage return RET_NAMES = make_list_names(4, "info", "d", "u", "vt"); RET = make_list(RET_NAMES, 4, INFO, D, U, VT); R_END; return RET; }
/// /// This is the new standard style for implementing a slave routine for a ScaLAPACK operator, /// in this case, pdgesvd_(). The difference from the old style is that the new style /// requires that the ScaLAPACK context, ICTXT, be provided. Until that requirement can be pushed /// up into the "mpi_slave_xxx" files, the existing pdgesvdSlave() routine will create the context /// and then call this routine. /// slpp::int_t pdgesvdSlave2(const slpp::int_t ICTXT, PdgesvdArgs args, void* bufs[], size_t sizes[], unsigned count) { // find out where I am in the scalapack grid slpp::int_t NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM; getSlInfo(ICTXT/*in*/, NPROW/*in*/, NPCOL/*in*/, MYPROW/*out*/, MYPCOL/*out*/, MYPNUM/*out*/); if(NPROW != args.NPROW || NPCOL != args.NPCOL || MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){ if(DBG) { std::cerr << "scalapack general parameter mismatch" << std::endl; std::cerr << "args NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL << "MYPROW:"<<args.MYPROW<<" MYPCOL:"<<args.MYPCOL<<"MYPNUM:"<<MYPNUM << std::endl; std::cerr << "ScaLAPACK NPROW:"<<NPROW<<" NPCOL:"<<NPCOL << "MYPROW:"<<MYPROW<<" MYPCOL:"<<MYPCOL<<"MYPNUM:"<<MYPNUM << std::endl; } } // setup MB,NB const slpp::int_t& M = args.A.DESC.M ; const slpp::int_t& N = args.A.DESC.N ; const slpp::int_t& MB = args.A.DESC.MB ; const slpp::int_t& NB = args.A.DESC.NB ; const slpp::int_t& LLD_A = args.A.DESC.LLD ; const slpp::int_t one = 1 ; const slpp::int_t LTD_A = std::max(one, numroc_( N, NB, MYPCOL, /*CSRC_A*/0, NPCOL )); const slpp::int_t& MP = LLD_A ; const slpp::int_t& NQ = LTD_A ; // size check A, S, U, VT slpp::int_t SIZE_A = MP * NQ ; slpp::int_t SIZE_S = std::min(M, N); slpp::int_t size_p = std::max(one, numroc_( SIZE_S, MB, MYPROW, /*RSRC_A*/0, NPROW )); slpp::int_t size_q = std::max(one, numroc_( SIZE_S, NB, MYPCOL, /*RSRC_A*/0, NPCOL )); slpp::int_t SIZE_U = MP * size_q; slpp::int_t SIZE_VT= size_p * NQ; if(DBG) { std::cerr << "##################################################" << std::endl; std::cerr << "####pdgesvdSlave##################################" << std::endl; std::cerr << "one:" << one << std::endl; std::cerr << "SIZE_S:" << SIZE_S << std::endl; std::cerr << "MB:" << MB << std::endl; std::cerr << "MYPROW:" << MYPROW << std::endl; std::cerr << "NPROW:" << NPROW << std::endl; } // TODO: >= because master is permitted to use a larger buffer // to allow to see if rounding up to chunksize eliminates some errors // before we put the roundUp formula everywhere SLAVE_ASSERT_ALWAYS(sizes[BUF_A] >= SIZE_A * sizeof(double)); SLAVE_ASSERT_ALWAYS(sizes[BUF_S] >= SIZE_S * sizeof(double)); if (args.jobU == 'V') { SLAVE_ASSERT_ALWAYS( sizes[BUF_U] >= SIZE_U *sizeof(double)); } if (args.jobVT == 'V') { SLAVE_ASSERT_ALWAYS( sizes[BUF_VT] >= SIZE_VT *sizeof(double)); } // sizes are correct, give the pointers their names double* A = reinterpret_cast<double*>(bufs[BUF_A]) ; double* S = reinterpret_cast<double*>(bufs[BUF_S]) ; double* U = reinterpret_cast<double*>(bufs[BUF_U]) ; double* VT = reinterpret_cast<double*>(bufs[BUF_VT]) ; // debug that the input is readable and show its contents if(DBG) { for(int ii=0; ii < SIZE_A; ii++) { std::cerr << "("<< MYPROW << "," << MYPCOL << ") A["<<ii<<"] = " << A[ii] << std::endl; } } if(false) { // debug that outputs are writeable: for(int ii=0; ii < SIZE_S; ii++) { S[ii] = -9999.0 ; } if (args.jobU == 'V') { for(int ii=0; ii < SIZE_U; ii++) { U[ii] = -9999.0 ; } } if (args.jobVT == 'V') { for(int ii=0; ii < SIZE_VT; ii++) { VT[ii] = -9999.0 ; } } } // ScaLAPACK: the DESCS are complete except for the correct context args.A.DESC.CTXT= ICTXT ; // note: no DESC for S, it is not distributed, all have a copy args.U.DESC.CTXT= ICTXT ; args.VT.DESC.CTXT= ICTXT ; if(DBG) { std::cerr << "pdgesvdSlave: argsBuf is: {" << std::endl; std::cerr << args << std::endl; std::cerr << "}" << std::endl << std::endl; std::cerr << "pdgesvdSlave: calling pdgesvd_ for computation, with args:" << std::endl ; std::cerr << "jobU: " << args.jobU << ", jobVT: " << args.jobVT << ", M: " << args.M << ", N: " << args.N << std::endl; std::cerr << "A: " << (void*)(A) << ", A.I: " << args.A.I << ", A.J: " << args.A.J << std::endl; std::cerr << ", A.DESC: " << args.A.DESC << std::endl; std::cerr << "S: " << (void*)(S) << std::endl; std::cerr << "U: " << (void*)(U) << ", U.I: " << args.U.I << ", U.J: " << args.U.J << std::endl; std::cerr << ", U.DESC: " << args.U.DESC << std::endl; std::cerr << "VT: " << (void*)(VT) << ", VT.I: " << args.VT.I << ", VT.J: " << args.VT.J << std::endl; std::cerr << ", VT.DESC: " << args.VT.DESC << std::endl; } if(DBG) std::cerr << "pdgesvdSlave calling PDGESVD to get work size" << std:: endl; slpp::int_t INFO = 0; double LWORK_DOUBLE; pdgesvd_(args.jobU, args.jobVT, args.M, args.N, A, args.A.I, args.A.J, args.A.DESC, S, U, args.U.I, args.U.J, args.U.DESC, VT, args.VT.I, args.VT.J, args.VT.DESC, &LWORK_DOUBLE, -1, INFO); if(INFO < 0) { // argument error std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "ERROR: pdgesvd_() for work size, argument error, argument # " << -INFO << std::endl; } else if(INFO == (std::min(args.M, args.N)+1)) { // should not happen when checking work size // heterogeneity detected (eigenvalues did not match on all nodes) std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "WARNING: pdgesvd_() for work size, eigenvalues did not match across all instances" << std::endl; } else if (INFO > 0) { // other + value of INFO // should not happen when checking work size // DBDSQR did not converge std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "ERROR: pdgesvd_() for work size, DBDSQR did not converge: " << INFO << std::endl; } if ( LWORK_DOUBLE < 0.0 || LWORK_DOUBLE > double(numeric_limits<slpp::int_t>::max())) { // Houston, we have a problem ... the user wants to do more than 1 instance // can handle through the slpp::int ... the size of which is determined // by which binary for ScaLAPACK/BLAS we are using .. .32-bit or 64-bit FORTRAN INTEGER // noting that 32-bit INTEGER is what is shipped with RHEL, CentOS, etc, even on // 64-bit systems, for some unknown reason. std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "ERROR: LWORK_DOUBLE, " << LWORK_DOUBLE << ", is too large for the ScaLAPACK API to accept" << INFO << std::endl; if (INFO >= 0) { // make up our own argument error... -22 (there are 20 arguments) INFO = -22; } return INFO; } slpp::int_t LWORK = int(LWORK_DOUBLE); // get the cast from SVDPhysical.cpp std::cerr << "pdgesvdSlave(): info: LWORK is " << LWORK << std::endl; // ALLOCATE an array WORK size LWORK boost::scoped_array<double> WORKtmp(new double[LWORK]); double* WORK = WORKtmp.get(); ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// if(DBG) std::cerr << "pdgesvdSlave: calling pdgesvd_ for computation." << std::endl ; INFO=0; pdgesvd_(args.jobU, args.jobVT, args.M, args.N, A, args.A.I, args.A.J, args.A.DESC, S, U, args.U.I, args.U.J, args.U.DESC, VT, args.VT.I, args.VT.J, args.VT.DESC, WORK, LWORK, INFO); slpp::int_t numToPrintAtStart=4 ; if (MYPNUM==0 && DBG) { int ii; // used in 2nd loop for(ii=0; ii < std::min(SIZE_S, numToPrintAtStart); ii++) { std::cerr << "pdgesvdSlave: S["<<ii<<"] = " << S[ii] << std::endl; } // now skip to numToPrintAtStart before the end, (without repeating) and print to the end // to see if the test cases are producing zero eigenvalues (don't want that) slpp::int_t numToPrintAtEnd=4; for(int ii=std::max(ii, SIZE_S-numToPrintAtEnd); ii < SIZE_S; ii++) { std::cerr << "pdgesvdSlave: S["<<ii<<"] = " << S[ii] << std::endl; } } if (DBG) { if (args.jobU == 'V') { for(int ii=0; ii < std::min(SIZE_U, numToPrintAtStart); ii++) { std::cerr << "pdgesvdSlave: U["<<ii<<"] = " << U[ii] << std::endl; } } if (args.jobVT == 'V') { for(int ii=0; ii < std::min(SIZE_VT, numToPrintAtStart); ii++) { std::cerr << "pdgesvdSlave: VT["<<ii<<"] = " << VT[ii] << std::endl; } } } if(MYPNUM == 0) { if(INFO < 0) { // argument error std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "ERROR: argument error, argument # " << -INFO << std::endl; } else if(INFO == (std::min(args.M, args.N)+1)) { // heterogeneity detected (eigenvalues did not match on all nodes) std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "WARNING: eigenvalues did not match across all instances" << std::endl; } else if (INFO > 0) { // other + value of INFO // DBDSQR did not converge std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): " << "ERROR: DBDSQR did not converge: " << INFO << std::endl; } } return INFO ; }