コード例 #1
0
ファイル: mat_redist.c プロジェクト: operedo/fnlsdp
void 
dgather(int ictxt, int n, int numc, int nb, double *A, double *A_d, int *descAd){

int RootNodeic, ione=1, izero=0, isRootNode=0, nru, info;
int nprow, npcol, myrow, mycol, descA[9], itemp;
int i,k;

   sl_init_(&RootNodeic, &ione, &ione);

   Cblacs_gridinfo(ictxt, &nprow,&npcol, &myrow, &mycol);

   if (myrow==0 && mycol ==0){ isRootNode = 1;}

   if(isRootNode){
      nru = numroc_(&n, &n, &myrow, &izero, &nprow);
      itemp = max(1,nru);
      descinit_(descA, &n, &numc, &n, &n, &izero, &izero, &RootNodeic, &itemp, &info );
   }
   else{
      k=0;
      for(i=0;i<9;i++){
          descA[k]=0;
          k++;
      }
      descA[1]=-1;
    }

   pdgemr2d_(&n,&numc,A_d,&ione, &ione, descAd, A, &ione, &ione, descA, &ictxt );

   if (isRootNode){
       Cblacs_gridexit(RootNodeic);
   }
  
}
コード例 #2
0
ファイル: bem2d_mpi.cpp プロジェクト: YangchenVR/bem2d
BlacsSystem::BlacsSystem(int nprow, int npcol,int mb, int nb) :
                mb_(mb), nb_(nb)
{

        // Initialize Scalapack
        sl_init_(&ictxt_,&nprow,&npcol);


        // Fill the context information
        MPI_Comm_size(MPI_COMM_WORLD,&mpiprocs_);
        MPI_Comm_rank(MPI_COMM_WORLD,&mpirank_);

        Cblacs_gridinfo(ictxt_,&nprow_,&npcol_,&myrow_,&mycol_);

}
コード例 #3
0
ファイル: pdgesvdSlave.cpp プロジェクト: suhailrehman/scidb
slpp::int_t pdgesvdSlave(void* bufs[], size_t sizes[], unsigned count, bool debugOverwriteArgs)
{
    // TODO:  exit()S and SLAVE_ASSERT()s need to use MPI_abort() / blacs_abort() instead

    for(size_t i=0; i < count; i++) {
        if(DBG) {
            std::cerr << "doPdgesvd: buffer at:"<< bufs[i] << std::endl;
            std::cerr << "doPdgesvd: bufsize =" << sizes[i] << std::endl;
        }
    }

    if(count < NUM_BUFS) {
        std::cerr << "pdgesvdSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl;
        ::abort();
    }
    // size check and get reference to args
    enum dummy {BUF_ARGS=0};  // NOTE bufs[BUF_ARGS] should not be referenced by pdgesvdSlave2
    SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgesvdArgs));
    scidb::PdgesvdArgs args = *reinterpret_cast<PdgesvdArgs*>(bufs[BUF_ARGS]) ;

    // set up the scalapack grid, this has to be done before we generate the fake
    // problem
    slpp::int_t ICTXT=-1; // will be overwritten by sl_init
    sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/);  // sl_init calls blacs_grid_init
    if(DBG) std::cerr << "pdgesvdSlave: sl_init(NPROW: "<<args.NPROW<<", NPCOL:"<<args.NPCOL<<") -> ICTXT: " << ICTXT << std::endl;

    // blacs_grid_info is legal after this

    // take a COPY of args, because we may have to overwrite it (for debug) when overwriteArgs is set
    // NOTE bufs[BUF_ARGS] should not be referenced after this point
    if(debugOverwriteArgs) {
        slpp::int_t NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM;
        getSlInfo(ICTXT/*in*/, NPROW/*in*/, NPCOL/*in*/, MYPROW/*out*/, MYPCOL/*out*/, MYPNUM/*out*/);

        size_t matrixCells = sizes[1]/sizeof(double);
        size_t matrixOrder = floor(sqrt(matrixCells));  // TODO: should be multiplied by NPROW*NPCOL
        args = pdgesvdGenTestArgs(ICTXT, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM, matrixOrder);
    }

    return pdgesvdSlave2(ICTXT, args, bufs, sizes, count);
}
コード例 #4
0
ファイル: mat_redist.c プロジェクト: operedo/fnlsdp
void 
ddistr(int ictxt, int n, int numc, int nb, double *A , double *A_d, int *descAd ){

int RootNodeic, ione=1, izero=0, isRootNode=0, nru, info;
int nprow, npcol, myrow, mycol,descA[9], itemp;
int i,k;

/*
#ifdef NOUNDERLAPACK
 sl_init__(&RootNodeic,&ione, &ione);
#else
  sl_init__(&RootNodeic,&ione, &ione);
#endif
*/



   
  sl_init_(&RootNodeic,&ione, &ione);
  
  Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);  


	//printf("nprow=%d, npcol=%d, myrow=%d, mycol=%d\n",nprow,npcol,myrow,mycol);
	//printf("nb=%d\n",nb);


  if (myrow==0 && mycol==0) { isRootNode=1;}

  if (isRootNode){
     //printf("root entro aca...\n");
	nru = numroc_(&n, &n, &myrow,&izero, &nprow );
     //printf("root paso numroc\n");
	itemp = max(1, nru);
     descinit_(descA, &n, &numc, &n, &n, &izero, &izero, &RootNodeic, &itemp, &info);
  	//printf("root paso descinit\n");
  } 
  else{
     //printf("yo entre aca\n");
     k=0;
     for(i=0;i<9;i++){ 
       descA[k]=0;
       k++;
     }
     descA[1]=-1;
  }

  //printf("inicio de cosas para todos\n");
  nru = numroc_(&n, &nb, &myrow, &izero, &nprow);
  //printf("todos pasan numroc\n");
  itemp = max(1,nru);
  descinit_(descAd, &n, &numc, &nb, &nb, &izero, &izero, &ictxt,&itemp, &info);  
  //printf("todos pasan descinit\n");

  pdgemr2d_( &n, &numc, A, &ione, &ione, descA, A_d, &ione, &ione, descAd, &ictxt);
  //printf("todos pasan pdgemr2d\n");
  
 if (isRootNode){ 
     	//printf("RootNodeic=%d\n",RootNodeic);
	Cblacs_gridexit(RootNodeic);
	//printf("root paso gridexit\n");
  }
}
コード例 #5
0
sl_int_t mpiCopySlave(void* bufs[], size_t sizes[], unsigned count)
{
    enum dummy  {DBG=0};
    enum dummy2 {BUF_ARGS=0, BUF_IN, BUF_OUT, NUM_BUFS };

    if(DBG) {
        std::cerr << "mpiCopySlave(): entered" << std::endl;
        for(size_t i=0; i < count; i++) {
            std::cerr << "mpiCopySlave: buffer at:"<< bufs[i] << std::endl;
            std::cerr << "mpiCopySlave: bufsize =" << sizes[i] << std::endl;
        }
    }

    if(count < NUM_BUFS) {
        std::cerr << "mpiCopySlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl;
        ::exit(99); // something that does not look like a signal
    }

    // take a COPY of args (because we will have to patch DESC.CTXT)
    scidb::MPICopyArgs args = *reinterpret_cast<MPICopyArgs*>(bufs[BUF_ARGS]) ;
    if(DBG) {
        std::cerr << "mpiCopySlave: args --------------------------" << std::endl ;
        std::cerr << args << std::endl;
        std::cerr << "mpiCopySlave: args end ----------------------" << std::endl ;
    }

    // set up the scalapack grid
    if(DBG) std::cerr << "##### sl_init() NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL<<std::endl;
    slpp::int_t ICTXT=-1; // will be overwritten by sl_init

    // call scalapack tools routine to initialize a scalapack grid and give us its
    // context
    sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/); 

    sl_int_t NPROW=-1, NPCOL=-1, MYPROW=-1, MYPCOL=-1, MYPNUM=-1; // illegal vals
    getSlaveBLACSInfo(ICTXT/*in*/, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM);

    if(NPROW  != args.NPROW  || NPCOL  != args.NPCOL ||
       MYPROW != args.MYPROW || MYPCOL != args.MYPCOL ||
       MYPNUM != args.MYPNUM){
        std::cerr << "scalapack general parameter mismatch:" << std::endl;
        std::cerr << "args:" << std::endl;
        std::cerr << "NP=("<<args.NPROW<<", "<<args.NPCOL <<")"<< std::endl;
        std::cerr << "MYP("<<args.MYPROW<<", "<<args.MYPCOL<<")"<< std::endl;
        std::cerr << "MYPNUM" <<args.MYPNUM << std::endl;
        std::cerr << "ScaLAPACK:" << std::endl;
        std::cerr << "NP=("<<NPROW<<", "<<NPCOL <<")"<< std::endl;
        std::cerr << "MYP("<<MYPROW<<", "<<MYPCOL<<")"<< std::endl;
        std::cerr << "MYPNUM" <<MYPNUM << std::endl;
        ::exit(99); // something that does not look like a signal
    }

    const sl_int_t& LLD_IN = args.IN.DESC.LLD ;
    const sl_int_t one = 1 ;
    const sl_int_t  LTD_IN = std::max(one, numroc_( args.IN.DESC.N, args.IN.DESC.NB, MYPCOL, /*CSRC_IN*/0, NPCOL )); 
    const sl_int_t& MP = LLD_IN ;
    const sl_int_t& NQ = LTD_IN ;

    // size check args
    if( sizes[BUF_ARGS] != sizeof(MPICopyArgs)) {
        assert(false); // TODO: correct way to fail
        ::exit(99); // something that does not look like a signal
    }

    // size check IN
    sl_int_t SIZE_IN = MP*NQ ;
    if( sizes[BUF_IN] != SIZE_IN * sizeof(double)) {
        std::cerr << "slave: error size mismatch:" << std::endl;
        std::cerr << "sizes[BUF_IN]" << sizes[BUF_IN] << std::endl;
        std::cerr << "MP * NQ = " << MP <<"*"<<NQ<<"="<< MP*NQ << std::endl;
        assert(false); // TODO: correct way to fail
        ::exit(99); // something that does not look like a signal
    }

    // size check OUT
    sl_int_t SIZE_OUT = SIZE_IN;
    if( sizes[BUF_OUT] != SIZE_OUT *sizeof(double)) {
        std::cerr << "sizes[BUF_OUT]:"<<sizes[BUF_OUT];
        std::cerr << "MP * NQ = " << MP <<"*"<<NQ<<"="<< MP*NQ << std::endl;
        assert(false); // TODO: correct way to fail
        ::exit(99); // something that does not look like a signal
    }

    // sizes are correct, give the pointers their names
    double* IN = reinterpret_cast<double*>(bufs[BUF_IN]) ;
    double* OUT = reinterpret_cast<double*>(bufs[BUF_OUT]) ;

    // here's the whole thing: copy IN to OUT
    // TODO: use memcpy instead
    for(int ii=0; ii < SIZE_OUT; ii++) {
        OUT[ii] = IN[ii] ;
    }

    return 0;
}
コード例 #6
0
ファイル: fnlsdp.c プロジェクト: operedo/fnlsdp
int main(int argc, char *argv[]){
	gettimeofday(&tp, NULL);
  	starttime=(double)tp.tv_sec+(1.e-6)*tp.tv_usec;

	int id, np, ret;
	/* dirs, files, tags ...*/
	const char* dir="../data/compleib_data/";
	const char* dirinit="../data/initial_points/";
	const char* code="REA1";
	const char* tag="01";
	char* solutionQP="solutionQP.dat-s";


	/*algorithm vars*/
	int max_iter=10000,step_1_fail=20;	
	double romax=10.0,beta=0.9,gamma=0.1,sigma=0.5,tolerancia_ro=10e-4;
	
	/* compleib data matrices */
	genmatrix A, B1, B,C1, C,D11,D12,D21;
	
	/* compleib initial point */
	genmatrix F0, Q0, V0;
	
	/* compleib matrix dimensions*/
	int nx,nw,nu,nz,ny;	
	int i,n,k;
	double rho=100.0;
	
	/* auxiliar matrices */
	genmatrix AF, CF, OUT, MF1, MF2;
	struct blockmatrix diagMF1, diagMF2;
	
	/* filter SDP */
	filter Fil;
	//double beta=0.9;
	//double gamma=0.1;

	/* sdp problem data */
	struct blockmatrix Csdp;
  	double *asdp;
  	struct constraintmatrix *constraintssdp;
  	
	/* sdp variables */
	struct blockmatrix X,Z;
  	double *y;
	
	/* sdp value of objectives functions */
  	double pobj,dobj;
	genmatrix *null=NULL;
	
	 /* Initialize the process grid */
	struct scalapackpar scapack;
  	struct paramstruc params;
  	int printlevel;

	/*load compleib data*/
	//int size=10974;
	//load_genmatrix("testeigenvalue/bcsstk17.dat",&A,size,size,0);
	//load_compleib(code, dir, &A, &B1, &B, &C1, &C, &D11, &D12, &D21, &nx, &nw, &nu, &nz, &ny, id);
	//load_initial_point(code,tag,dirinit,&F0,&Q0,&V0,nx,nu,ny, id);
	
	
	//initialize_filter(&Fil,500.0,500.0);
	
	
	MPI_Init(&argc,&argv);
  	MPI_Comm_rank (MPI_COMM_WORLD,&id);
  	MPI_Comm_size (MPI_COMM_WORLD,&np);
	scapack.id = id;
  	scapack.np = np;

  	switch (scapack.np)
        {
        	case 1: scapack.nprow=1; scapack.npcol=1;
          		break;
        	case 2: scapack.nprow=2; scapack.npcol=1;
          		break;
        	case 4: scapack.nprow=2; scapack.npcol=2;
          		break;
        	case 8: scapack.nprow=4; scapack.npcol=2;
          		break;
        	case 16: scapack.nprow=4; scapack.npcol=4;
          		break;
        	case 32: scapack.nprow=8; scapack.npcol=4;
          		break;
        	case 64: scapack.nprow=8; scapack.npcol=8;
          		break;
        	default:
          		if (scapack.id==0)
              			printf("Can not setup %d processors to a grid.\nPlease use 1,2,4,8,9,16,32 or 64 nodes to run or modify fnlsdp.c file. \n",scapack.np);
          		MPI_Finalize();
          		return(1);
	};
   	
	sl_init_(&scapack.ic,&scapack.nprow,&scapack.npcol);
	Cblacs_gridinfo(scapack.ic,&scapack.nprow,&scapack.npcol,&scapack.myrow,&scapack.mycol);
	
	/*
	if(id==0)print_filter(&Fil);
	if(acceptable(&Fil,10.0,3.0,beta,gamma)) {printf("acceptable!\n");add(&Fil,10.0,3.0);}
	if(id==0)print_filter(&Fil);
	if(acceptable(&Fil,8.0,3.1,beta,gamma)) {printf("acceptable!\n");add(&Fil,8.0,3.1);}
	if(id==0)print_filter(&Fil);
	if(acceptable(&Fil,6.0,3.2,beta,gamma)) {printf("acceptable!\n");add(&Fil,6.0,3.2);}
	if(id==0)print_filter(&Fil);
	extract(&Fil,10.0,3.0);
	if(id==0)print_filter(&Fil);
	extract(&Fil,8.0,3.1);
	if(id==0)print_filter(&Fil);
	*/	


//printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow);

	
	//printf("f=%f\ntheta=%f\n",eval_f(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id),eval_theta(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id));
//printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow);

	//test_nelmin(&F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id);

	//fix_genmatrix(&Q0);
	//print_genmatrix(&V0);

	/*double ll=lambda1(&A,size,scapack,params,printlevel,id);	
	if(id==0)printf("lambda1=%f\n",ll);
	MPI_Barrier(MPI_COMM_WORLD);
	free_mat_gen(&A,0);*/

//printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow);


	algorithm(code,tag,dir,dirinit,max_iter,romax,beta,gamma,sigma,tolerancia_ro,step_1_fail,scapack,params,printlevel,id);


	/*
	double *dx=(double *)calloc(nu*ny+nx*(nx+1),sizeof(double));	
	double *x_current=(double *)calloc(nu*ny+nx*(nx+1),sizeof(double));	

	printf("holaaaa\n");

	mats2vec(dx,&F0,&Q0,&V0,nu,ny,nx);
	
	for(i=0;i<nu*ny+nx*(nx+1);i++){
		printf("dx[%d]=%f\n",i,dx[i]);
	}

	mats2vec(x_current,&F0,&Q0,&V0,nu,ny,nx);
	
	for(i=0;i<nu*ny+nx*(nx+1);i++){
		printf("x_current[%d]=%f\n",i,x_current[i]);
	}
	
	printf("fobj=%f\n",eval_nabla_f_vec(dx,x_current, rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id));
	
	free(dx);
	free(x_current);
	*/

	/*solve qp*/
	//ret=0;
	//fix_genmatrix(&Q0);
	//ret=solve_qp(code,"01", &F0,&Q0, &V0,rho,&A,&B1,&B,&C1,&C,&D11,&D12,&D21,nx,nw,nu,ny,nz,scapack,params,printlevel,id,solutionQP);
//printf("scapack: %d,%d,%d,%d,%d\n",scapack.ic,scapack.npcol,scapack.nprow,scapack.mycol,scapack.myrow);
	
	/*if(DEBUG_FNLSDP && id==0){
		printf("F1:\n");
		print_genmatrix(&F0);
		printf("Q1:\n");
		print_genmatrix(&Q0);
		printf("V1:\n");
		print_genmatrix(&V0);
	}*/
	
	
	//free_filter(&Fil);
	//free_initial_point(&F0,&Q0,&V0, np);
	//free_compleib(&A, &B1, &B, &C1, &C, &D11, &D12, &D21, np);
  	
	Cblacs_gridexit(scapack.ic);
	MPI_Finalize();
        gettimeofday(&tp, NULL);
  	endtime=(double)tp.tv_sec+(1.e-6)*tp.tv_usec;
  	totaltime=endtime-starttime;
  	othertime=totaltime-opotime-factortime;
	if(id==0){
  		printf("Elements time: %f \n",opotime);
  		printf("Factor time: %f \n",factortime);
  		printf("Other time: %f \n",othertime);
  		printf("Total time: %f \n",totaltime);
	}
	return ret;
}
コード例 #7
0
ファイル: pdgemmSlave.cpp プロジェクト: Myasuka/scidb
///
/// @return INFO = the status of the psgemm_()
///
slpp::int_t pdgemmSlave(void* bufs[], size_t sizes[], unsigned count)
{
    enum dummy {BUF_ARGS=0, BUF_A, BUF_B, BUF_C, NUM_BUFS };

    for(size_t i=0; i < count; i++) {
        if(DBG) {
            std::cerr << "pdgemmSlave: buffer at:"<< bufs[i] << std::endl;
            std::cerr << "pdgemmSlave: bufsize =" << sizes[i] << std::endl;
        }
    }

    if(count < NUM_BUFS) {
        std::cerr << "pdgemmSlave: master sent " << count << " buffers, but " << NUM_BUFS << " are required." << std::endl;
        ::exit(99); // something that does not look like a signal
    }

    // take a COPY of args (because we will have to patch DESC.CTXT)
    scidb::PdgemmArgs args = *reinterpret_cast<PdgemmArgs*>(bufs[BUF_ARGS]) ;
    if(DBG) {
        std::cerr << "pdgemmSlave: args {" << std::endl ;
        std::cerr << args << std::endl;
        std::cerr << "}" << std::endl ;
    }

    // set up the scalapack grid
    if(DBG) std::cerr << "pdgemmSlave: NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL<<std::endl;
    slpp::int_t ICTXT=-1; // will be overwritten by sl_init

    // call scalapack tools routine to initialize a scalapack grid and give us its
    // context
    sl_init_(ICTXT/*out*/, args.NPROW/*in*/, args.NPCOL/*in*/);
    slpp::int_t NPROW=-1, NPCOL=-1, MYPROW=-1, MYPCOL=-1, MYPNUM=-1; // illegal vals
    getSlaveBLACSInfo(ICTXT/*in*/, NPROW, NPCOL, MYPROW, MYPCOL, MYPNUM);

    if(NPROW != args.NPROW || NPCOL != args.NPCOL ||
       MYPROW != args.MYPROW || MYPCOL != args.MYPCOL || MYPNUM != args.MYPNUM){
        if(DBG) {
            std::cerr << "scalapack general parameter mismatch" << std::endl;
            std::cerr << "args NPROW:"<<args.NPROW<<" NPCOL:"<<args.NPCOL
                      << "MYPROW:"<<args.MYPROW<<" MYPCOL:"<<args.MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
            std::cerr << "ScaLAPACK NPROW:"<<NPROW<<" NPCOL:"<<NPCOL
                      << "MYPROW:"<<MYPROW<<" MYPCOL:"<<MYPCOL<<"MYPNUM:"<<MYPNUM
                      << std::endl;
        }
    }

    const slpp::int_t one = 1 ;
    const slpp::int_t  LTD_A = std::max(one, numroc_( args.A.DESC.N, args.A.DESC.NB, MYPCOL, /*CSRC_A*/0, NPCOL ));
    const slpp::int_t  LTD_B = std::max(one, numroc_( args.B.DESC.N, args.B.DESC.NB, MYPCOL, /*CSRC_B*/0, NPCOL ));
    const slpp::int_t  LTD_C = std::max(one, numroc_( args.C.DESC.N, args.C.DESC.NB, MYPCOL, /*CSRC_C*/0, NPCOL ));

    if(DBG) {
        std::cerr << "##################################################" << std::endl;
        std::cerr << "####pdgemmSlave##################################" << std::endl;
        std::cerr << "one:" << one << std::endl;
        std::cerr << "args.A.DESC.MB:" << args.A.DESC.MB << std::endl;
        std::cerr << "MYPROW:" << MYPROW << std::endl;
        std::cerr << "NPROW:" << NPROW << std::endl;
    }

    // size check args
    SLAVE_ASSERT_ALWAYS( sizes[BUF_ARGS] >= sizeof(PdgemmArgs));

    // size check A,B,C -- debugs first
    slpp::int_t SIZE_A = args.A.DESC.LLD * LTD_A ;
    slpp::int_t SIZE_B = args.B.DESC.LLD * LTD_B ;
    slpp::int_t SIZE_C = args.C.DESC.LLD * LTD_C ;
    if(DBG) {
        if(sizes[BUF_A] != SIZE_A *sizeof(double)) {
            std::cerr << "sizes[BUF_A]:" << sizes[BUF_A]
                      << " != args.A.DESC.LLD:" << args.A.DESC.LLD
                      << "* LTD_A" << LTD_A << "*" << sizeof(double) << std::endl;
        }
        if(sizes[BUF_B] != SIZE_B *sizeof(double)) {
            std::cerr << "sizes[BUF_B]:" << sizes[BUF_B]
                      << " != args.B.DESC.LLD:" << args.B.DESC.LLD
                      << "* LTD_B" << LTD_B << "*" << sizeof(double) << std::endl;
        }
        if(sizes[BUF_C] != SIZE_C *sizeof(double)) {
            std::cerr << "sizes[BUF_C]:" << sizes[BUF_C]
                      << " != args.C.DESC.LLD:" << args.C.DESC.LLD
                      << "* LTD_C" << LTD_C << "*" << sizeof(double) << std::endl;
        }
    }
    SLAVE_ASSERT_ALWAYS(sizes[BUF_A] >= SIZE_A * sizeof(double));
    SLAVE_ASSERT_ALWAYS(sizes[BUF_B] >= SIZE_B * sizeof(double));
    SLAVE_ASSERT_ALWAYS(sizes[BUF_C] >= SIZE_C * sizeof(double));

    // sizes are correct, give the pointers their names
    double* A = reinterpret_cast<double*>(bufs[BUF_A]) ;
    double* B = reinterpret_cast<double*>(bufs[BUF_B]) ;
    double* C = reinterpret_cast<double*>(bufs[BUF_C]) ;

    // debug that the input is readable and show its contents
    if(DBG) {
        for(int ii=0; ii < SIZE_A; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") A["<<ii<<"] = " << A[ii] << std::endl;
        }
        for(int ii=0; ii < SIZE_B; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") B["<<ii<<"] = " << B[ii] << std::endl;
        }
        for(int ii=0; ii < SIZE_C; ii++) {
            std::cerr << "Pgrid("<< MYPROW << "," << MYPCOL << ") C["<<ii<<"] = " << C[ii] << std::endl;
        }
    }


    // ScaLAPACK: the DESCS are complete except for the correct context
    args.A.DESC.CTXT= ICTXT ;
    // (no DESC for S)
    args.B.DESC.CTXT= ICTXT ;
    args.C.DESC.CTXT= ICTXT ;

    if(true || DBG) {    // we'll leave this on in Cheshire.0 and re-evaluate later
        std::cerr << "pdgemmSlave: argsBuf is: {" << std::endl;
        std::cerr << args << std::endl;
        std::cerr << "}" << std::endl << std::endl;

        std::cerr << "pdgemmSlave: calling pdgemm_ for computation, with args:" << std::endl ;
        std::cerr << "TRANSA: " << args.TRANSA
                  << ", TRANSB: " << args.TRANSB
                  << ", M: " << args.M
                  << ", N: " << args.N
                  << ", K: " << args.K << std::endl;

        std::cerr << "ALPHA: " << args.ALPHA << std::endl;

        std::cerr << "A: " <<  (void*)(A)
                  << ", A.I: " << args.A.I
                  << ", A.J: " << args.A.J << std::endl;
        std::cerr << ", A.DESC: " << args.A.DESC << std::endl;

        std::cerr << "B: " <<  (void*)(B)
                  << ", B.I: " << args.B.I
                  << ", B.J: " << args.B.J << std::endl;
        std::cerr << ", B.DESC: " << args.B.DESC << std::endl;

        std::cerr << "BETA: " << args.BETA << std::endl;

        std::cerr << "C: " <<  (void*)(C)
                  << ", C.I: " << args.C.I
                  << ", C.J: " << args.C.J << std::endl;
        std::cerr << ", C.DESC: " << args.C.DESC << std::endl;
    }

    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    //////////////////////////////////////////////////////////////////////
    pdgemm_( args.TRANSA, args.TRANSB, args.M, args.N, args.K,
             &args.ALPHA,
             A,  args.A.I,  args.A.J,  args.A.DESC,
             B,  args.B.I,  args.B.J,  args.B.DESC,
             &args.BETA,
             C, args.C.I, args.C.J, args.C.DESC);

    if(true || DBG) {    // we'll leave this on in Cheshire.0 and re-evaluate later
        std::cerr << "pdgemmSlave: pdgemm_ complete (pdgemm_ has no result INFO)" << std::endl;
    }

    if (DBG) {
        std::cerr << "pdgemmSlave outputs: {" << std::endl;
        // debug prints of the outputs:
        for(int ii=0; ii < SIZE_C; ii++) {
            std::cerr << " C["<<ii<<"] = " << C[ii] << std::endl;
        }
        std::cerr << "}" << std::endl;
    }

    // TODO: what is the check on the pdgemm_ (pblas call) for successful completion?
    if (DBG) std::cerr << "pdgemmSlave returning successfully:" << std::endl;
    slpp::int_t INFO = 0 ;
    return INFO ;
}