void dgather(int ictxt, int n, int numc, int nb, double *A, double *A_d, int *descAd){ int RootNodeic, ione=1, izero=0, isRootNode=0, nru, info; int nprow, npcol, myrow, mycol, descA[9], itemp; int i,k; sl_init_(&RootNodeic, &ione, &ione); Cblacs_gridinfo(ictxt, &nprow,&npcol, &myrow, &mycol); if (myrow==0 && mycol ==0){ isRootNode = 1;} if(isRootNode){ nru = numroc_(&n, &n, &myrow, &izero, &nprow); itemp = max(1,nru); descinit_(descA, &n, &numc, &n, &n, &izero, &izero, &RootNodeic, &itemp, &info ); } else{ k=0; for(i=0;i<9;i++){ descA[k]=0; k++; } descA[1]=-1; } pdgemr2d_(&n,&numc,A_d,&ione, &ione, descAd, A, &ione, &ione, descA, &ictxt ); if (isRootNode){ Cblacs_gridexit(RootNodeic); } }
BlacsSystem::BlacsSystem(int nprow, int npcol,int mb, int nb) : mb_(mb), nb_(nb) { // Initialize Scalapack sl_init_(&ictxt_,&nprow,&npcol); // Fill the context information MPI_Comm_size(MPI_COMM_WORLD,&mpiprocs_); MPI_Comm_rank(MPI_COMM_WORLD,&mpirank_); Cblacs_gridinfo(ictxt_,&nprow_,&npcol_,&myrow_,&mycol_); }
slpp::int_t pdgesvdSlave(void* bufs[], size_t sizes[], unsigned count, bool debugOverwriteArgs) { // TODO: exit()S and SLAVE_ASSERT()s need to use MPI_abort() / blacs_abort() instead for(size_t i=0; i < count; i++) { if(DBG) { std::cerr << "doPdgesvd: buffer at:"<< bufs[i] << std::endl; std::cerr << "doPdgesvd: bufsize =" << sizes[i] << std::endl; } } if(count < NUM_BUFS) { std::cerr << "pdgesvdSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl; ::abort(); } // size check and get reference to args enum dummy {BUF_ARGS=0}; // NOTE bufs[BUF_ARGS] should not be referenced by pdgesvdSlave2 SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgesvdArgs)); scidb::PdgesvdArgs args = *reinterpret_cast<PdgesvdArgs*>(bufs[BUF_ARGS]) ; // set up the scalapack grid, this has to be done before we generate the fake // problem slpp::int_t ICTXT=-1; // will be overwritten by sl_init sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/); // sl_init calls blacs_grid_init if(DBG) std::cerr << "pdgesvdSlave: sl_init(NPROW: "<<args.NPROW<<", NPCOL:"<<args.NPCOL<<") -> ICTXT: " << ICTXT << std::endl; // blacs_grid_info is legal after this // take a COPY of args, because we may have to overwrite it (for debug) when overwriteArgs is set // NOTE bufs[BUF_ARGS] should not be referenced after this point if(debugOverwriteArgs) { slpp::int_t NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM; getSlInfo(ICTXT/*in*/, NPROW/*in*/, NPCOL/*in*/, MYPROW/*out*/, MYPCOL/*out*/, MYPNUM/*out*/); size_t matrixCells = sizes[1]/sizeof(double); size_t matrixOrder = floor(sqrt(matrixCells)); // TODO: should be multiplied by NPROW*NPCOL args = pdgesvdGenTestArgs(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM, matrixOrder); } return pdgesvdSlave2(ICTXT, args, bufs, sizes, count); }
void ddistr(int ictxt, int n, int numc, int nb, double *A , double *A_d, int *descAd ){ int RootNodeic, ione=1, izero=0, isRootNode=0, nru, info; int nprow, npcol, myrow, mycol,descA[9], itemp; int i,k; /* #ifdef NOUNDERLAPACK sl_init__(&RootNodeic,&ione, &ione); #else sl_init__(&RootNodeic,&ione, &ione); #endif */ sl_init_(&RootNodeic,&ione, &ione); Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol); //printf("nprow=%d, npcol=%d, myrow=%d, mycol=%d\n",nprow,npcol,myrow,mycol); //printf("nb=%d\n",nb); if (myrow==0 && mycol==0) { isRootNode=1;} if (isRootNode){ //printf("root entro aca...\n"); nru = numroc_(&n, &n, &myrow,&izero, &nprow ); //printf("root paso numroc\n"); itemp = max(1, nru); descinit_(descA, &n, &numc, &n, &n, &izero, &izero, &RootNodeic, &itemp, &info); //printf("root paso descinit\n"); } else{ //printf("yo entre aca\n"); k=0; for(i=0;i<9;i++){ descA[k]=0; k++; } descA[1]=-1; } //printf("inicio de cosas para todos\n"); nru = numroc_(&n, &nb, &myrow, &izero, &nprow); //printf("todos pasan numroc\n"); itemp = max(1,nru); descinit_(descAd, &n, &numc, &nb, &nb, &izero, &izero, &ictxt,&itemp, &info); //printf("todos pasan descinit\n"); pdgemr2d_( &n, &numc, A, &ione, &ione, descA, A_d, &ione, &ione, descAd, &ictxt); //printf("todos pasan pdgemr2d\n"); if (isRootNode){ //printf("RootNodeic=%d\n",RootNodeic); Cblacs_gridexit(RootNodeic); //printf("root paso gridexit\n"); } }
sl_int_t mpiCopySlave(void* bufs[], size_t sizes[], unsigned count) { enum dummy {DBG=0}; enum dummy2 {BUF_ARGS=0, BUF_IN, BUF_OUT, NUM_BUFS }; if(DBG) { std::cerr << "mpiCopySlave(): entered" << std::endl; for(size_t i=0; i < count; i++) { std::cerr << "mpiCopySlave: buffer at:"<< bufs[i] << std::endl; std::cerr << "mpiCopySlave: bufsize =" << sizes[i] << std::endl; } } if(count < NUM_BUFS) { std::cerr << "mpiCopySlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl; ::exit(99); // something that does not look like a signal } // take a COPY of args (because we will have to patch DESC.CTXT) scidb::MPICopyArgs args = *reinterpret_cast<MPICopyArgs*>(bufs[BUF_ARGS]) ; if(DBG) { std::cerr << "mpiCopySlave: args --------------------------" << std::endl ; std::cerr << args << std::endl; std::cerr << "mpiCopySlave: args end ----------------------" << std::endl ; } // set up the scalapack grid if(DBG) std::cerr << "##### sl_init() NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL<<std::endl; slpp::int_t ICTXT=-1; // will be overwritten by sl_init // call scalapack tools routine to initialize a scalapack grid and give us its // context sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/); sl_int_t NPROW=-1, NPCOL=-1, MYPROW=-1, MYPCOL=-1, MYPNUM=-1; // illegal vals getSlaveBLACSInfo(ICTXT/*in*/, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM); if(NPROW != args.NPROW || NPCOL != args.NPCOL || MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){ std::cerr << "scalapack general parameter mismatch:" << std::endl; std::cerr << "args:" << std::endl; std::cerr << "NP=("<<args.NPROW<<", "<<args.NPCOL <<")"<< std::endl; std::cerr << "MYP("<<args.MYPROW<<", "<<args.MYPCOL<<")"<< std::endl; std::cerr << "MYPNUM" <<args.MYPNUM << std::endl; std::cerr << "ScaLAPACK:" << std::endl; std::cerr << "NP=("<<NPROW<<", "<<NPCOL <<")"<< std::endl; std::cerr << "MYP("<<MYPROW<<", "<<MYPCOL<<")"<< std::endl; std::cerr << "MYPNUM" <<MYPNUM << std::endl; ::exit(99); // something that does not look like a signal } const sl_int_t& LLD_IN = args.IN.DESC.LLD ; const sl_int_t one = 1 ; const sl_int_t LTD_IN = std::max(one, numroc_( args.IN.DESC.N, args.IN.DESC.NB, MYPCOL, /*CSRC_IN*/0, NPCOL )); const sl_int_t& MP = LLD_IN ; const sl_int_t& NQ = LTD_IN ; // size check args if( sizes[BUF_ARGS] != sizeof(MPICopyArgs)) { assert(false); // TODO: correct way to fail ::exit(99); // something that does not look like a signal } // size check IN sl_int_t SIZE_IN = MP*NQ ; if( sizes[BUF_IN] != SIZE_IN * sizeof(double)) { std::cerr << "slave: error size mismatch:" << std::endl; std::cerr << "sizes[BUF_IN]" << sizes[BUF_IN] << std::endl; std::cerr << "MP * NQ = " << MP <<"*"<<NQ<<"="<< MP*NQ << std::endl; assert(false); // TODO: correct way to fail ::exit(99); // something that does not look like a signal } // size check OUT sl_int_t SIZE_OUT = SIZE_IN; if( sizes[BUF_OUT] != SIZE_OUT *sizeof(double)) { std::cerr << "sizes[BUF_OUT]:"<<sizes[BUF_OUT]; std::cerr << "MP * NQ = " << MP <<"*"<<NQ<<"="<< MP*NQ << std::endl; assert(false); // TODO: correct way to fail ::exit(99); // something that does not look like a signal } // sizes are correct, give the pointers their names double* IN = reinterpret_cast<double*>(bufs[BUF_IN]) ; double* OUT = reinterpret_cast<double*>(bufs[BUF_OUT]) ; // here's the whole thing: copy IN to OUT // TODO: use memcpy instead for(int ii=0; ii < SIZE_OUT; ii++) { OUT[ii] = IN[ii] ; } return 0; }
int main(int argc, char *argv[]){ gettimeofday(&tp, NULL); starttime=(double)tp.tv_sec+(1.e-6)*tp.tv_usec; int id, np, ret; /* dirs, files, tags ...*/ const char* dir="../data/compleib_data/"; const char* dirinit="../data/initial_points/"; const char* code="REA1"; const char* tag="01"; char* solutionQP="solutionQP.dat-s"; /*algorithm vars*/ int max_iter=10000,step_1_fail=20; double romax=10.0,beta=0.9,gamma=0.1,sigma=0.5,tolerancia_ro=10e-4; /* compleib data matrices */ genmatrix A, B1, B,C1, C,D11,D12,D21; /* compleib initial point */ genmatrix F0, Q0, V0; /* compleib matrix dimensions*/ int nx,nw,nu,nz,ny; int i,n,k; double rho=100.0; /* auxiliar matrices */ genmatrix AF, CF, OUT, MF1, MF2; struct blockmatrix diagMF1, diagMF2; /* filter SDP */ filter Fil; //double beta=0.9; //double gamma=0.1; /* sdp problem data */ struct blockmatrix Csdp; double *asdp; struct constraintmatrix *constraintssdp; /* sdp variables */ struct blockmatrix X,Z; double *y; /* sdp value of objectives functions */ double pobj,dobj; genmatrix *null=NULL; /* Initialize the process grid */ struct scalapackpar scapack; struct paramstruc params; int printlevel; /*load compleib data*/ //int size=10974; //load_genmatrix("testeigenvalue/bcsstk17.dat",&A,size,size,0); //load_compleib(code, dir, &A, &B1, &B, &C1, &C, &D11, &D12, &D21, &nx, &nw, &nu, &nz, &ny, id); //load_initial_point(code,tag,dirinit,&F0,&Q0,&V0,nx,nu,ny, id); //initialize_filter(&Fil,500.0,500.0); MPI_Init(&argc,&argv); MPI_Comm_rank (MPI_COMM_WORLD,&id); MPI_Comm_size (MPI_COMM_WORLD,&np); scapack.id = id; scapack.np = np; switch (scapack.np) { case 1: scapack.nprow=1; scapack.npcol=1; break; case 2: scapack.nprow=2; scapack.npcol=1; break; case 4: scapack.nprow=2; scapack.npcol=2; break; case 8: scapack.nprow=4; scapack.npcol=2; break; case 16: scapack.nprow=4; scapack.npcol=4; break; case 32: scapack.nprow=8; scapack.npcol=4; break; case 64: scapack.nprow=8; scapack.npcol=8; break; default: if (scapack.id==0) printf("Can not setup %d processors to a grid.\nPlease use 1,2,4,8,9,16,32 or 64 nodes to run or modify fnlsdp.c file. \n",scapack.np); MPI_Finalize(); return(1); }; sl_init_(&scapack.ic,&scapack.nprow,&scapack.npcol); Cblacs_gridinfo(scapack.ic,&scapack.nprow,&scapack.npcol,&scapack.myrow,&scapack.mycol); /* if(id==0)print_filter(&Fil); if(acceptable(&Fil,10.0,3.0,beta,gamma)) {printf("acceptable!\n");add(&Fil,10.0,3.0);} if(id==0)print_filter(&Fil); if(acceptable(&Fil,8.0,3.1,beta,gamma)) {printf("acceptable!\n");add(&Fil,8.0,3.1);} if(id==0)print_filter(&Fil); if(acceptable(&Fil,6.0,3.2,beta,gamma)) {printf("acceptable!\n");add(&Fil,6.0,3.2);} if(id==0)print_filter(&Fil); extract(&Fil,10.0,3.0); if(id==0)print_filter(&Fil); extract(&Fil,8.0,3.1); if(id==0)print_filter(&Fil); */ //printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow); //printf("f=%f\ntheta=%f\n",eval_f(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id),eval_theta(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id)); //printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow); //test_nelmin(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id); //fix_genmatrix(&Q0); //print_genmatrix(&V0); /*double ll=lambda1(&A,size,scapack,params,printlevel,id); if(id==0)printf("lambda1=%f\n",ll); MPI_Barrier(MPI_COMM_WORLD); free_mat_gen(&A,0);*/ //printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow); algorithm(code,tag,dir,dirinit,max_iter,romax,beta,gamma,sigma,tolerancia_ro,step_1_fail,scapack,params,printlevel,id); /* double *dx=(double *)calloc(nu*ny+nx*(nx+1),sizeof(double)); double *x_current=(double *)calloc(nu*ny+nx*(nx+1),sizeof(double)); printf("holaaaa\n"); mats2vec(dx,&F0,&Q0,&V0,nu,ny,nx); for(i=0;i<nu*ny+nx*(nx+1);i++){ printf("dx[%d]=%f\n",i,dx[i]); } mats2vec(x_current,&F0,&Q0,&V0,nu,ny,nx); for(i=0;i<nu*ny+nx*(nx+1);i++){ printf("x_current[%d]=%f\n",i,x_current[i]); } printf("fobj=%f\n",eval_nabla_f_vec(dx,x_current, rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id)); free(dx); free(x_current); */ /*solve qp*/ //ret=0; //fix_genmatrix(&Q0); //ret=solve_qp(code,"01", &F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id,solutionQP); //printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow); /*if(DEBUG_FNLSDP && id==0){ printf("F1:\n"); print_genmatrix(&F0); printf("Q1:\n"); print_genmatrix(&Q0); printf("V1:\n"); print_genmatrix(&V0); }*/ //free_filter(&Fil); //free_initial_point(&F0,&Q0,&V0, np); //free_compleib(&A, &B1, &B, &C1, &C, &D11, &D12, &D21, np); Cblacs_gridexit(scapack.ic); MPI_Finalize(); gettimeofday(&tp, NULL); endtime=(double)tp.tv_sec+(1.e-6)*tp.tv_usec; totaltime=endtime-starttime; othertime=totaltime-opotime-factortime; if(id==0){ printf("Elements time: %f \n",opotime); printf("Factor time: %f \n",factortime); printf("Other time: %f \n",othertime); printf("Total time: %f \n",totaltime); } return ret; }
/// /// @return INFO = the status of the psgemm_() /// slpp::int_t pdgemmSlave(void* bufs[], size_t sizes[], unsigned count) { enum dummy {BUF_ARGS=0, BUF_A, BUF_B, BUF_C, NUM_BUFS }; for(size_t i=0; i < count; i++) { if(DBG) { std::cerr << "pdgemmSlave: buffer at:"<< bufs[i] << std::endl; std::cerr << "pdgemmSlave: bufsize =" << sizes[i] << std::endl; } } if(count < NUM_BUFS) { std::cerr << "pdgemmSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl; ::exit(99); // something that does not look like a signal } // take a COPY of args (because we will have to patch DESC.CTXT) scidb::PdgemmArgs args = *reinterpret_cast<PdgemmArgs*>(bufs[BUF_ARGS]) ; if(DBG) { std::cerr << "pdgemmSlave: args {" << std::endl ; std::cerr << args << std::endl; std::cerr << "}" << std::endl ; } // set up the scalapack grid if(DBG) std::cerr << "pdgemmSlave: NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL<<std::endl; slpp::int_t ICTXT=-1; // will be overwritten by sl_init // call scalapack tools routine to initialize a scalapack grid and give us its // context sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/); slpp::int_t NPROW=-1, NPCOL=-1, MYPROW=-1, MYPCOL=-1, MYPNUM=-1; // illegal vals getSlaveBLACSInfo(ICTXT/*in*/, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM); if(NPROW != args.NPROW || NPCOL != args.NPCOL || MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){ if(DBG) { std::cerr << "scalapack general parameter mismatch" << std::endl; std::cerr << "args NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL << "MYPROW:"<<args.MYPROW<<" MYPCOL:"<<args.MYPCOL<<"MYPNUM:"<<MYPNUM << std::endl; std::cerr << "ScaLAPACK NPROW:"<<NPROW<<" NPCOL:"<<NPCOL << "MYPROW:"<<MYPROW<<" MYPCOL:"<<MYPCOL<<"MYPNUM:"<<MYPNUM << std::endl; } } const slpp::int_t one = 1 ; const slpp::int_t LTD_A = std::max(one, numroc_( args.A.DESC.N, args.A.DESC.NB, MYPCOL, /*CSRC_A*/0, NPCOL )); const slpp::int_t LTD_B = std::max(one, numroc_( args.B.DESC.N, args.B.DESC.NB, MYPCOL, /*CSRC_B*/0, NPCOL )); const slpp::int_t LTD_C = std::max(one, numroc_( args.C.DESC.N, args.C.DESC.NB, MYPCOL, /*CSRC_C*/0, NPCOL )); if(DBG) { std::cerr << "##################################################" << std::endl; std::cerr << "####pdgemmSlave##################################" << std::endl; std::cerr << "one:" << one << std::endl; std::cerr << "args.A.DESC.MB:" << args.A.DESC.MB << std::endl; std::cerr << "MYPROW:" << MYPROW << std::endl; std::cerr << "NPROW:" << NPROW << std::endl; } // size check args SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgemmArgs)); // size check A,B,C -- debugs first slpp::int_t SIZE_A = args.A.DESC.LLD * LTD_A ; slpp::int_t SIZE_B = args.B.DESC.LLD * LTD_B ; slpp::int_t SIZE_C = args.C.DESC.LLD * LTD_C ; if(DBG) { if(sizes[BUF_A] != SIZE_A *sizeof(double)) { std::cerr << "sizes[BUF_A]:" << sizes[BUF_A] << " != args.A.DESC.LLD:" << args.A.DESC.LLD << "* LTD_A" << LTD_A << "*" << sizeof(double) << std::endl; } if(sizes[BUF_B] != SIZE_B *sizeof(double)) { std::cerr << "sizes[BUF_B]:" << sizes[BUF_B] << " != args.B.DESC.LLD:" << args.B.DESC.LLD << "* LTD_B" << LTD_B << "*" << sizeof(double) << std::endl; } if(sizes[BUF_C] != SIZE_C *sizeof(double)) { std::cerr << "sizes[BUF_C]:" << sizes[BUF_C] << " != args.C.DESC.LLD:" << args.C.DESC.LLD << "* LTD_C" << LTD_C << "*" << sizeof(double) << std::endl; } } SLAVE_ASSERT_ALWAYS(sizes[BUF_A] >= SIZE_A * sizeof(double)); SLAVE_ASSERT_ALWAYS(sizes[BUF_B] >= SIZE_B * sizeof(double)); SLAVE_ASSERT_ALWAYS(sizes[BUF_C] >= SIZE_C * sizeof(double)); // sizes are correct, give the pointers their names double* A = reinterpret_cast<double*>(bufs[BUF_A]) ; double* B = reinterpret_cast<double*>(bufs[BUF_B]) ; double* C = reinterpret_cast<double*>(bufs[BUF_C]) ; // debug that the input is readable and show its contents if(DBG) { for(int ii=0; ii < SIZE_A; ii++) { std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") A["<<ii<<"] = " << A[ii] << std::endl; } for(int ii=0; ii < SIZE_B; ii++) { std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") B["<<ii<<"] = " << B[ii] << std::endl; } for(int ii=0; ii < SIZE_C; ii++) { std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") C["<<ii<<"] = " << C[ii] << std::endl; } } // ScaLAPACK: the DESCS are complete except for the correct context args.A.DESC.CTXT= ICTXT ; // (no DESC for S) args.B.DESC.CTXT= ICTXT ; args.C.DESC.CTXT= ICTXT ; if(true || DBG) { // we'll leave this on in Cheshire.0 and re-evaluate later std::cerr << "pdgemmSlave: argsBuf is: {" << std::endl; std::cerr << args << std::endl; std::cerr << "}" << std::endl << std::endl; std::cerr << "pdgemmSlave: calling pdgemm_ for computation, with args:" << std::endl ; std::cerr << "TRANSA: " << args.TRANSA << ", TRANSB: " << args.TRANSB << ", M: " << args.M << ", N: " << args.N << ", K: " << args.K << std::endl; std::cerr << "ALPHA: " << args.ALPHA << std::endl; std::cerr << "A: " << (void*)(A) << ", A.I: " << args.A.I << ", A.J: " << args.A.J << std::endl; std::cerr << ", A.DESC: " << args.A.DESC << std::endl; std::cerr << "B: " << (void*)(B) << ", B.I: " << args.B.I << ", B.J: " << args.B.J << std::endl; std::cerr << ", B.DESC: " << args.B.DESC << std::endl; std::cerr << "BETA: " << args.BETA << std::endl; std::cerr << "C: " << (void*)(C) << ", C.I: " << args.C.I << ", C.J: " << args.C.J << std::endl; std::cerr << ", C.DESC: " << args.C.DESC << std::endl; } ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// pdgemm_( args.TRANSA, args.TRANSB, args.M, args.N, args.K, &args.ALPHA, A, args.A.I, args.A.J, args.A.DESC, B, args.B.I, args.B.J, args.B.DESC, &args.BETA, C, args.C.I, args.C.J, args.C.DESC); if(true || DBG) { // we'll leave this on in Cheshire.0 and re-evaluate later std::cerr << "pdgemmSlave: pdgemm_ complete (pdgemm_ has no result INFO)" << std::endl; } if (DBG) { std::cerr << "pdgemmSlave outputs: {" << std::endl; // debug prints of the outputs: for(int ii=0; ii < SIZE_C; ii++) { std::cerr << " C["<<ii<<"] = " << C[ii] << std::endl; } std::cerr << "}" << std::endl; } // TODO: what is the check on the pdgemm_ (pblas call) for successful completion? if (DBG) std::cerr << "pdgemmSlave returning successfully:" << std::endl; slpp::int_t INFO = 0 ; return INFO ; }