コード例 #1
0
ファイル: pdgesvdSlave.cpp プロジェクト: suhailrehman/scidb
slpp::int_t pdgesvdSlave(void* bufs[], size_t sizes[], unsigned count, bool debugOverwriteArgs)
{
    // TODO:  exit()S and SLAVE_ASSERT()s need to use MPI_abort() / blacs_abort() instead

    for(size_t i=0; i < count; i++) {
        if(DBG) {
            std::cerr << "doPdgesvd: buffer at:"<< bufs[i] << std::endl;
            std::cerr << "doPdgesvd: bufsize =" << sizes[i] << std::endl;
        }
    }

    if(count < NUM_BUFS) {
        std::cerr << "pdgesvdSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl;
        ::abort();
    }
    // size check and get reference to args
    enum dummy {BUF_ARGS=0};  // NOTE bufs[BUF_ARGS] should not be referenced by pdgesvdSlave2
    SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgesvdArgs));
    scidb::PdgesvdArgs args = *reinterpret_cast<PdgesvdArgs*>(bufs[BUF_ARGS]) ;

    // set up the scalapack grid, this has to be done before we generate the fake
    // problem
    slpp::int_t ICTXT=-1; // will be overwritten by sl_init
    sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/);  // sl_init calls blacs_grid_init
    if(DBG) std::cerr << "pdgesvdSlave: sl_init(NPROW: "<<args.NPROW<<", NPCOL:"<<args.NPCOL<<") -> ICTXT: " << ICTXT << std::endl;

    // blacs_grid_info is legal after this

    // take a COPY of args, because we may have to overwrite it (for debug) when overwriteArgs is set
    // NOTE bufs[BUF_ARGS] should not be referenced after this point
    if(debugOverwriteArgs) {
        slpp::int_t NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM;
        getSlInfo(ICTXT/*in*/, NPROW/*in*/, NPCOL/*in*/, MYPROW/*out*/, MYPCOL/*out*/, MYPNUM/*out*/);

        size_t matrixCells = sizes[1]/sizeof(double);
        size_t matrixOrder = floor(sqrt(matrixCells));  // TODO: should be multiplied by NPROW*NPCOL
        args = pdgesvdGenTestArgs(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM, matrixOrder);
    }

    return pdgesvdSlave2(ICTXT, args, bufs, sizes, count);
}
コード例 #2
0
ファイル: pdgesvdSlave.cpp プロジェクト: suhailrehman/scidb
///
/// This is the new standard style for implementing a slave routine for a ScaLAPACK operator,
/// in this case, pdgesvd_().  The difference from the old style is that the new style
/// requires that the ScaLAPACK context, ICTXT, be provided.  Until that requirement can be pushed
/// up into the "mpi_slave_xxx" files, the existing pdgesvdSlave() routine will create the context
/// and then call this routine.
///
slpp::int_t pdgesvdSlave2(const slpp::int_t ICTXT, PdgesvdArgs args, void* bufs[], size_t sizes[], unsigned count)
{
    // find out where I am in the scalapack grid
    slpp::int_t NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM;
    getSlInfo(ICTXT/*in*/, NPROW/*in*/, NPCOL/*in*/, MYPROW/*out*/, MYPCOL/*out*/, MYPNUM/*out*/);

    if(NPROW != args.NPROW || NPCOL != args.NPCOL ||
       MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){
        if(DBG) {
            std::cerr << "scalapack general parameter mismatch" << std::endl;
            std::cerr << "args NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL
                      << "MYPROW:"<<args.MYPROW<<" MYPCOL:"<<args.MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
            std::cerr << "ScaLAPACK NPROW:"<<NPROW<<" NPCOL:"<<NPCOL
                      << "MYPROW:"<<MYPROW<<" MYPCOL:"<<MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
        }
    }

    // setup MB,NB
    const slpp::int_t& M = args.A.DESC.M ;
    const slpp::int_t& N = args.A.DESC.N ;
    const slpp::int_t& MB = args.A.DESC.MB ;
    const slpp::int_t& NB = args.A.DESC.NB ;

    const slpp::int_t& LLD_A = args.A.DESC.LLD ;
    const slpp::int_t one = 1 ;
    const slpp::int_t  LTD_A = std::max(one, numroc_( N, NB, MYPCOL, /*CSRC_A*/0, NPCOL ));

    const slpp::int_t& MP = LLD_A ;
    const slpp::int_t& NQ = LTD_A ;


    // size check A, S, U, VT
    slpp::int_t SIZE_A = MP * NQ ;
    slpp::int_t SIZE_S = std::min(M, N);
    slpp::int_t size_p = std::max(one, numroc_( SIZE_S, MB, MYPROW, /*RSRC_A*/0, NPROW ));
    slpp::int_t size_q = std::max(one, numroc_( SIZE_S, NB, MYPCOL, /*RSRC_A*/0, NPCOL ));
    slpp::int_t SIZE_U = MP * size_q;
    slpp::int_t SIZE_VT= size_p * NQ;

    if(DBG) {
        std::cerr << "##################################################" << std::endl;
        std::cerr << "####pdgesvdSlave##################################" << std::endl;
        std::cerr << "one:" << one << std::endl;
        std::cerr << "SIZE_S:" << SIZE_S << std::endl;
        std::cerr << "MB:" << MB << std::endl;
        std::cerr << "MYPROW:" << MYPROW << std::endl;
        std::cerr << "NPROW:" << NPROW << std::endl;
    }

    // TODO: >= because master is permitted to use a larger buffer
    //          to allow to see if rounding up to chunksize eliminates some errors
    //          before we put the roundUp formula everywhere
    SLAVE_ASSERT_ALWAYS(sizes[BUF_A] >= SIZE_A * sizeof(double));
    SLAVE_ASSERT_ALWAYS(sizes[BUF_S] >= SIZE_S * sizeof(double));
    if (args.jobU == 'V') {
        SLAVE_ASSERT_ALWAYS( sizes[BUF_U] >= SIZE_U *sizeof(double));
    }
    if (args.jobVT == 'V') {
        SLAVE_ASSERT_ALWAYS( sizes[BUF_VT] >= SIZE_VT *sizeof(double));
    }

    // sizes are correct, give the pointers their names
    double* A = reinterpret_cast<double*>(bufs[BUF_A]) ;
    double* S = reinterpret_cast<double*>(bufs[BUF_S]) ;
    double* U = reinterpret_cast<double*>(bufs[BUF_U]) ;
    double* VT = reinterpret_cast<double*>(bufs[BUF_VT]) ;

    // debug that the input is readable and show its contents
    if(DBG) {
        for(int ii=0; ii < SIZE_A; ii++) {
            std::cerr << "("<< MYPROW << "," << MYPCOL << ") A["<<ii<<"] = " << A[ii] << std::endl;
        }
    }

    if(false) {
        // debug that outputs are writeable:
        for(int ii=0; ii < SIZE_S; ii++) {
            S[ii] = -9999.0 ;
        }
        if (args.jobU == 'V') {
            for(int ii=0; ii < SIZE_U; ii++) {
                U[ii] = -9999.0 ;
            }
        }
        if (args.jobVT == 'V') {
            for(int ii=0; ii < SIZE_VT; ii++) {
                VT[ii] = -9999.0 ;
            }
        }
    }

    // ScaLAPACK: the DESCS are complete except for the correct context
    args.A.DESC.CTXT= ICTXT ;  // note: no DESC for S, it is not distributed, all have a copy
    args.U.DESC.CTXT= ICTXT ;
    args.VT.DESC.CTXT= ICTXT ;

    if(DBG) {
        std::cerr << "pdgesvdSlave: argsBuf is: {" << std::endl;
        std::cerr << args << std::endl;
        std::cerr << "}" << std::endl << std::endl;

        std::cerr << "pdgesvdSlave: calling pdgesvd_ for computation, with args:" << std::endl ;
        std::cerr << "jobU: " << args.jobU
                  << ", jobVT: " << args.jobVT
                  << ", M: " << args.M
                  << ", N: " << args.N << std::endl;

        std::cerr << "A: " <<  (void*)(A)
                  << ", A.I: " << args.A.I
                  << ", A.J: " << args.A.J << std::endl;
        std::cerr << ", A.DESC: " << args.A.DESC << std::endl;

        std::cerr << "S: " << (void*)(S) << std::endl;

        std::cerr << "U: " <<  (void*)(U)
                  << ", U.I: " << args.U.I
                  << ", U.J: " << args.U.J << std::endl;
        std::cerr << ", U.DESC: " << args.U.DESC << std::endl;

        std::cerr << "VT: " <<  (void*)(VT)
                  << ", VT.I: " << args.VT.I
                  << ", VT.J: " << args.VT.J << std::endl;
        std::cerr << ", VT.DESC: " << args.VT.DESC << std::endl;
    }


    if(DBG) std::cerr << "pdgesvdSlave calling PDGESVD to get work size" << std:: endl;
    slpp::int_t INFO = 0;
    double LWORK_DOUBLE;
    pdgesvd_(args.jobU, args.jobVT, args.M, args.N,
             A,  args.A.I,  args.A.J,  args.A.DESC, S,
             U,  args.U.I,  args.U.J,  args.U.DESC,
             VT, args.VT.I, args.VT.J, args.VT.DESC,
             &LWORK_DOUBLE, -1, INFO);

    if(INFO < 0) {
        // argument error
        std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                  << "ERROR: pdgesvd_() for work size, argument error, argument # " << -INFO << std::endl;
    } else if(INFO == (std::min(args.M, args.N)+1)) {
        // should not happen when checking work size
        // heterogeneity detected (eigenvalues did not match on all nodes)
        std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                  << "WARNING: pdgesvd_() for work size, eigenvalues did not match across all instances" << std::endl;
    } else if (INFO > 0) { // other + value of INFO
        // should not  happen when checking work size
        // DBDSQR did not converge
        std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                  << "ERROR: pdgesvd_() for work size, DBDSQR did not converge: " << INFO << std::endl;
    }

    if ( LWORK_DOUBLE < 0.0 ||
         LWORK_DOUBLE > double(numeric_limits<slpp::int_t>::max())) {
        // Houston, we have a problem ... the user wants to do more than 1 instance
        // can handle through the slpp::int ... the size of which is determined
        // by which binary for ScaLAPACK/BLAS we are using .. .32-bit or 64-bit FORTRAN INTEGER
        // noting that 32-bit INTEGER is what is shipped with RHEL, CentOS, etc, even on
        // 64-bit systems, for some unknown reason.
        std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                  << "ERROR: LWORK_DOUBLE, " << LWORK_DOUBLE << ", is too large for the ScaLAPACK API to accept" << INFO << std::endl;
        if (INFO >= 0) {
            // make up our own argument error... -22 (there are 20 arguments)
            INFO = -22;
        }
        return INFO;
    }

    slpp::int_t LWORK = int(LWORK_DOUBLE); // get the cast from SVDPhysical.cpp
    std::cerr << "pdgesvdSlave(): info: LWORK is " << LWORK << std::endl;

    // ALLOCATE an array WORK size LWORK
    boost::scoped_array<double> WORKtmp(new double[LWORK]);
    double* WORK = WORKtmp.get();

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    if(DBG) std::cerr << "pdgesvdSlave: calling pdgesvd_ for computation." << std::endl ;
    INFO=0;
    pdgesvd_(args.jobU, args.jobVT, args.M, args.N,
             A,  args.A.I,  args.A.J,  args.A.DESC, S,
             U,  args.U.I,  args.U.J,  args.U.DESC,
             VT, args.VT.I, args.VT.J, args.VT.DESC,
             WORK, LWORK, INFO);


    slpp::int_t numToPrintAtStart=4 ;
    if (MYPNUM==0 && DBG) {
        int ii; // used in 2nd loop
        for(ii=0; ii < std::min(SIZE_S, numToPrintAtStart); ii++) {
            std::cerr << "pdgesvdSlave: S["<<ii<<"] = " << S[ii] << std::endl;
        }
        // now skip to numToPrintAtStart before the end, (without repeating) and print to the end
        // to see if the test cases are producing zero eigenvalues (don't want that)
        slpp::int_t numToPrintAtEnd=4;
        for(int ii=std::max(ii, SIZE_S-numToPrintAtEnd); ii < SIZE_S; ii++) {
            std::cerr << "pdgesvdSlave: S["<<ii<<"] = " << S[ii] << std::endl;
        }
    }
    if (DBG) {
        if (args.jobU == 'V') {
            for(int ii=0; ii < std::min(SIZE_U, numToPrintAtStart); ii++) {
                std::cerr << "pdgesvdSlave: U["<<ii<<"] = " << U[ii] << std::endl;
            }
        }
        if (args.jobVT == 'V') {
            for(int ii=0; ii < std::min(SIZE_VT, numToPrintAtStart); ii++) {
                std::cerr << "pdgesvdSlave: VT["<<ii<<"] = " << VT[ii] << std::endl;
            }
        }
    }

    if(MYPNUM == 0) {
        if(INFO < 0) {
            // argument error
            std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                      << "ERROR: argument error, argument # " << -INFO << std::endl;
        } else if(INFO == (std::min(args.M, args.N)+1)) {
            // heterogeneity detected (eigenvalues did not match on all nodes)
            std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                      << "WARNING: eigenvalues did not match across all instances" << std::endl;
        } else if (INFO > 0) { // other + value of INFO
            // DBDSQR did not converge
            std::cerr << "pdgesvdSlave(r:"<<MYPNUM<<"): "
                      << "ERROR: DBDSQR did not converge: " << INFO << std::endl;
        }
    }
    return INFO ;
}
コード例 #3
0
ファイル: pdgemmSlave.cpp プロジェクト: Myasuka/scidb
///
/// @return INFO = the status of the psgemm_()
///
slpp::int_t pdgemmSlave(void* bufs[], size_t sizes[], unsigned count)
{
    enum dummy {BUF_ARGS=0, BUF_A, BUF_B, BUF_C, NUM_BUFS };

    for(size_t i=0; i < count; i++) {
        if(DBG) {
            std::cerr << "pdgemmSlave: buffer at:"<< bufs[i] << std::endl;
            std::cerr << "pdgemmSlave: bufsize =" << sizes[i] << std::endl;
        }
    }

    if(count < NUM_BUFS) {
        std::cerr << "pdgemmSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl;
        ::exit(99); // something that does not look like a signal
    }

    // take a COPY of args (because we will have to patch DESC.CTXT)
    scidb::PdgemmArgs args = *reinterpret_cast<PdgemmArgs*>(bufs[BUF_ARGS]) ;
    if(DBG) {
        std::cerr << "pdgemmSlave: args {" << std::endl ;
        std::cerr << args << std::endl;
        std::cerr << "}" << std::endl ;
    }

    // set up the scalapack grid
    if(DBG) std::cerr << "pdgemmSlave: NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL<<std::endl;
    slpp::int_t ICTXT=-1; // will be overwritten by sl_init

    // call scalapack tools routine to initialize a scalapack grid and give us its
    // context
    sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/);
    slpp::int_t NPROW=-1, NPCOL=-1, MYPROW=-1, MYPCOL=-1, MYPNUM=-1; // illegal vals
    getSlaveBLACSInfo(ICTXT/*in*/, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM);

    if(NPROW != args.NPROW || NPCOL != args.NPCOL ||
       MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){
        if(DBG) {
            std::cerr << "scalapack general parameter mismatch" << std::endl;
            std::cerr << "args NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL
                      << "MYPROW:"<<args.MYPROW<<" MYPCOL:"<<args.MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
            std::cerr << "ScaLAPACK NPROW:"<<NPROW<<" NPCOL:"<<NPCOL
                      << "MYPROW:"<<MYPROW<<" MYPCOL:"<<MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
        }
    }

    const slpp::int_t one = 1 ;
    const slpp::int_t  LTD_A = std::max(one, numroc_( args.A.DESC.N, args.A.DESC.NB, MYPCOL, /*CSRC_A*/0, NPCOL ));
    const slpp::int_t  LTD_B = std::max(one, numroc_( args.B.DESC.N, args.B.DESC.NB, MYPCOL, /*CSRC_B*/0, NPCOL ));
    const slpp::int_t  LTD_C = std::max(one, numroc_( args.C.DESC.N, args.C.DESC.NB, MYPCOL, /*CSRC_C*/0, NPCOL ));

    if(DBG) {
        std::cerr << "##################################################" << std::endl;
        std::cerr << "####pdgemmSlave##################################" << std::endl;
        std::cerr << "one:" << one << std::endl;
        std::cerr << "args.A.DESC.MB:" << args.A.DESC.MB << std::endl;
        std::cerr << "MYPROW:" << MYPROW << std::endl;
        std::cerr << "NPROW:" << NPROW << std::endl;
    }

    // size check args
    SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgemmArgs));

    // size check A,B,C -- debugs first
    slpp::int_t SIZE_A = args.A.DESC.LLD * LTD_A ;
    slpp::int_t SIZE_B = args.B.DESC.LLD * LTD_B ;
    slpp::int_t SIZE_C = args.C.DESC.LLD * LTD_C ;
    if(DBG) {
        if(sizes[BUF_A] != SIZE_A *sizeof(double)) {
            std::cerr << "sizes[BUF_A]:" << sizes[BUF_A]
                      << " != args.A.DESC.LLD:" << args.A.DESC.LLD
                      << "* LTD_A" << LTD_A << "*" << sizeof(double) << std::endl;
        }
        if(sizes[BUF_B] != SIZE_B *sizeof(double)) {
            std::cerr << "sizes[BUF_B]:" << sizes[BUF_B]
                      << " != args.B.DESC.LLD:" << args.B.DESC.LLD
                      << "* LTD_B" << LTD_B << "*" << sizeof(double) << std::endl;
        }
        if(sizes[BUF_C] != SIZE_C *sizeof(double)) {
            std::cerr << "sizes[BUF_C]:" << sizes[BUF_C]
                      << " != args.C.DESC.LLD:" << args.C.DESC.LLD
                      << "* LTD_C" << LTD_C << "*" << sizeof(double) << std::endl;
        }
    }
    SLAVE_ASSERT_ALWAYS(sizes[BUF_A] >= SIZE_A * sizeof(double));
    SLAVE_ASSERT_ALWAYS(sizes[BUF_B] >= SIZE_B * sizeof(double));
    SLAVE_ASSERT_ALWAYS(sizes[BUF_C] >= SIZE_C * sizeof(double));

    // sizes are correct, give the pointers their names
    double* A = reinterpret_cast<double*>(bufs[BUF_A]) ;
    double* B = reinterpret_cast<double*>(bufs[BUF_B]) ;
    double* C = reinterpret_cast<double*>(bufs[BUF_C]) ;

    // debug that the input is readable and show its contents
    if(DBG) {
        for(int ii=0; ii < SIZE_A; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") A["<<ii<<"] = " << A[ii] << std::endl;
        }
        for(int ii=0; ii < SIZE_B; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") B["<<ii<<"] = " << B[ii] << std::endl;
        }
        for(int ii=0; ii < SIZE_C; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") C["<<ii<<"] = " << C[ii] << std::endl;
        }
    }


    // ScaLAPACK: the DESCS are complete except for the correct context
    args.A.DESC.CTXT= ICTXT ;
    // (no DESC for S)
    args.B.DESC.CTXT= ICTXT ;
    args.C.DESC.CTXT= ICTXT ;

    if(true || DBG) {    // we'll leave this on in Cheshire.0 and re-evaluate later
        std::cerr << "pdgemmSlave: argsBuf is: {" << std::endl;
        std::cerr << args << std::endl;
        std::cerr << "}" << std::endl << std::endl;

        std::cerr << "pdgemmSlave: calling pdgemm_ for computation, with args:" << std::endl ;
        std::cerr << "TRANSA: " << args.TRANSA
                  << ", TRANSB: " << args.TRANSB
                  << ", M: " << args.M
                  << ", N: " << args.N
                  << ", K: " << args.K << std::endl;

        std::cerr << "ALPHA: " << args.ALPHA << std::endl;

        std::cerr << "A: " <<  (void*)(A)
                  << ", A.I: " << args.A.I
                  << ", A.J: " << args.A.J << std::endl;
        std::cerr << ", A.DESC: " << args.A.DESC << std::endl;

        std::cerr << "B: " <<  (void*)(B)
                  << ", B.I: " << args.B.I
                  << ", B.J: " << args.B.J << std::endl;
        std::cerr << ", B.DESC: " << args.B.DESC << std::endl;

        std::cerr << "BETA: " << args.BETA << std::endl;

        std::cerr << "C: " <<  (void*)(C)
                  << ", C.I: " << args.C.I
                  << ", C.J: " << args.C.J << std::endl;
        std::cerr << ", C.DESC: " << args.C.DESC << std::endl;
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    pdgemm_( args.TRANSA, args.TRANSB, args.M, args.N, args.K,
             &args.ALPHA,
             A,  args.A.I,  args.A.J,  args.A.DESC,
             B,  args.B.I,  args.B.J,  args.B.DESC,
             &args.BETA,
             C, args.C.I, args.C.J, args.C.DESC);

    if(true || DBG) {    // we'll leave this on in Cheshire.0 and re-evaluate later
        std::cerr << "pdgemmSlave: pdgemm_ complete (pdgemm_ has no result INFO)" << std::endl;
    }

    if (DBG) {
        std::cerr << "pdgemmSlave outputs: {" << std::endl;
        // debug prints of the outputs:
        for(int ii=0; ii < SIZE_C; ii++) {
            std::cerr << " C["<<ii<<"] = " << C[ii] << std::endl;
        }
        std::cerr << "}" << std::endl;
    }

    // TODO: what is the check on the pdgemm_ (pblas call) for successful completion?
    if (DBG) std::cerr << "pdgemmSlave returning successfully:" << std::endl;
    slpp::int_t INFO = 0 ;
    return INFO ;
}