コード例 #1
0
ファイル: base_mpi_blacs.c プロジェクト: shinichi81/pbdBASE
SEXP R_blacs_init(SEXP NPROW_in, SEXP NPCOL_in, SEXP ICTXT_in)
{
    R_INIT;
    SEXP NPROW, NPCOL, ICTXT, MYROW, MYCOL, RET, RET_NAMES;

    newRvec(NPROW, 1, "int");
    newRvec(NPCOL, 1, "int");
    newRvec(ICTXT, 1, "int");
    newRvec(MYROW, 1, "int");
    newRvec(MYCOL, 1, "int");

    INT(NPROW) = INT(NPROW_in);
    INT(NPCOL) = INT(NPCOL_in);
    INT(ICTXT) = INT(ICTXT_in);

    char order = 'R';

    /*  sl_init_(INTP(ICTXT), INTP(NPROW), INTP(NPCOL));*/
    Cblacs_get(INT(ICTXT_in), 0, INTP(ICTXT));
    Cblacs_gridinit(INTP(ICTXT), &order, INT(NPROW), INT(NPCOL));
    Cblacs_gridinfo(INT(ICTXT), INTP(NPROW), INTP(NPCOL), INTP(MYROW), INTP(MYCOL));

    RET_NAMES = make_list_names(5, "NPROW", "NPCOL", "ICTXT", "MYROW", "MYCOL");
    RET = make_list(RET_NAMES, 5, NPROW, NPCOL, ICTXT, MYROW, MYCOL);

    R_END;
    return(RET);
}
コード例 #2
0
ファイル: base_mpi_blacs.c プロジェクト: snoweye/pbdBASE
SEXP R_blacs_gridinit(SEXP NPROW_in, SEXP NPCOL_in, SEXP SHANDLE)
{
  R_INIT;
  SEXP NPROW, NPCOL, MYROW, MYCOL, RET, RET_NAMES, ICTXT;
  newRvec(NPROW, 1, "int");
  newRvec(NPCOL, 1, "int");
  newRvec(MYROW, 1, "int");
  newRvec(MYCOL, 1, "int");
  newRvec(ICTXT, 1, "int");
  
  INT(NPROW) = INT(NPROW_in);
  INT(NPCOL) = INT(NPCOL_in);
  INT(ICTXT) = INT(SHANDLE);
  
  char order = 'R';
  
  Cblacs_gridinit(INTP(ICTXT), &order, INT(NPROW), INT(NPCOL));
  
  Cblacs_gridinfo(INT(ICTXT), INTP(NPROW), INTP(NPCOL), INTP(MYROW), INTP(MYCOL));
  
  make_list_names(RET_NAMES, 5, "NPROW", "NPCOL", "ICTXT", "MYROW", "MYCOL");
  make_list(RET, RET_NAMES, 5, NPROW, NPCOL, ICTXT, MYROW, MYCOL);
  R_END;
  return(RET);
}
コード例 #3
0
ファイル: blacs.cpp プロジェクト: nooperpudd/Elemental
int GridInit( int bhandle, bool colMajor, int gridHeight, int gridWidth )
{ 
    int context = bhandle;
    const char* order = ( colMajor ? "Col" : "Row" );
    Cblacs_gridinit( &context, order, gridHeight, gridWidth ); 
    return context;
}
コード例 #4
0
/* Test program 
 * created 23/09/2014
 * author Alex Bombrun
 * 
 * icc -O1  -o eigen.exe lapackReadStore.c mpiutil.c normals.c matrixBlockStore.c -mkl
 * ./eigen.exe 4 4
 *
 */
int main(int argc, char **argv) {
  
    FILE* store;
    FILE* scaStore;
    
    int N , M;
    int i, j;
    
    int n_blocks;
    int scalapack_size;
    int NB, MB;
    int i_block, j_block;
    int dim[4];
    double * mat;  // local matrix block use for reading
        
    int t, t_block;
    
    const char* profileG_file_name= "./data/NormalsG/profile.txt";
    const char* store_location = "./data/ReducedNormals";
    const char* scaStore_location ="./data/DiagCholeskyReducedNormals";
    
    int mp;	 // number of rows in the processor grid
    int mla;   // number of rows in the local array
    int mb;    // number of rows in a block
    int np;	 // number of columns in the processor grid
    int nla;   // number of columns in the local array
    int nb;    // number of columns in a block
    
    int mype,npe; // rank and total number of process
    
    int idescal[9]; // matrix descriptors
    double *la; // matrix values: al is the local array
    
    int idescbl[9];
    double *lb;
    double normb;
    
    int idescxl[9];
    double *lx;
    double normx;
    
    int idesczl[9]; // matrix descriptors
    double *lz; // matrix values: al is the local array
    
    double *w;
   
    int ierr; // error output 
    int mp_ret, np_ret, myrow, mycol; // to store grid info
    
    int zero=0; // value used for the descriptor initialization
    int one=1; // value used for the descriptor initialization
    
    int  m,n; // matrix A dimensions
    double norm, cond;
    double *work = NULL;
    double * work2 = NULL;
    int *iwork = NULL;
    int lwork, liwork;


     float ll,mm,cr,cc;
      int ii,jj,pr,pc,h,g; // ii,jj coordinates of local array element
      int rsrc=0,csrc=0; // assume that 0,0 element should be stored in the 0,0 process
      int n_b = 1;
      int index;
    int icon; // scalapack cblacs context
    char normJob, jobz, uplo, trans, diag;
    
    double MPIt1, MPIt2, MPIelapsed;
    
    jobz= 'N'; uplo='U';
    Cblacs_pinfo( &mype, &npe );
    
     if (argc == 3) {
	//printf("%s %s %s\n", argv[0], argv[1], argv[2]);
	n_blocks= (int) strtol(argv[1], NULL, 10);
	scalapack_size= (int) strtol(argv[2], NULL, 10);
     } else {
	printf("Usage: expect 2 integers \n");
	printf(" 1 : the number of diagonal blocks \n");
	printf(" 2 : scalapack number to define block size (assume n is divisible by sqrt(p) and that n/sqrt(p) is divisible by this number)\n");
	exit( -1);
     }
    
  

    printf("%d/%d: read store\n",mype,npe);
   
    N = getNumberOfLine(profileG_file_name); // the dimension of the matrix;
    M = N; // square matrix
    
    m=M; //mla*mp;
    n=N; //nla*np;
   
    np = isqrt(npe); // assume that the number of process is a square
    mp = np; // square grid
    
    mla = m/mp; // assume that the matrix dimension if a multiple of the process grid dimension
    nla = n/np;
    
    mb = mla/scalapack_size; // assume that the dimension of the matrix is a multiple of the number of the number of diagonal blocks
    nb = nla/scalapack_size;
    
    // init CBLACS
    Cblacs_get( -1, 0, &icon );
    Cblacs_gridinit( &icon,"c", mp, np ); 
    Cblacs_gridinfo( icon, &mp_ret, &np_ret, &myrow, &mycol);
    
   

    // allocate local matrix
    la=malloc(sizeof(double)*mla*nla);
    printf("%d/%d: full matrix (%d,%d), local matrix (%d,%d), processor grid (%d,%d), block (%d,%d) \n", mype, npe, m, n, mla, nla, np, mp, mb, nb);

    // set identity matrix
    for(i = 0;i<M;i++){
	for(j = i;j<i+1;j++){
	    cr = (float)( i/mb );
	    h = rsrc+(int)(cr);
	    pr = h%np;
	    cc = (float)( j/mb );
	    g = csrc+(int)(cc);
	    pc = g%mp;
	    // check if process should get this element
	    if (myrow == pr && mycol==pc){
		// ii = x + l*mb
		// jj = y + m*nb
		ll = (float)( ( i/(np*mb) ) );  // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric
		mm = (float)( ( j/(mp*nb) ) );
		ii = i%mb + (int)(ll)*mb;
		jj = j%nb + (int)(mm)*nb;
		index=jj*mla+ii;   // seems to be the transpose !?
		//if(index<0) printf("%d/%d: negative index (%d,%d) \n",mype,npe,i,j);
		//if(index>=mla*nla) printf("%d/%d: too large index (%d,%d) \n",mype,npe,i,j);
		la[index] = 1;
	    }
	}
    }



/*
    for(i_block=0;i_block<n_blocks;i_block++){
      printf("%d/%d: process store block %d \n", mype, npe, i_block);
      readStore(&store,i_block,store_location);
      t_block = 0;
      while(readNextBlockDimension(dim,store)!=-1) { // loop B over all block tasks
	j_block = mpi_get_diag_block_id(i_block, t_block, n_blocks);
	mat = malloc((dim[1]-dim[0])*(dim[3]-dim[2]) * sizeof(double));         
	    
	readNextBlock(dim[0],dim[1],dim[2],dim[3],mat,store);
	if (dim[0]==dim[2]){ // process only the diagonal blocks

//	printf("%d/%d: read block (%d,%d) with global indices (%d,%d,%d,%d) \n",mype, npe, i_block,j_block,dim[0],dim[1],dim[2],dim[3]);
	
	NB = dim[1]-dim[0];
	MB = dim[3]-dim[2];
	for(i = dim[0];i<dim[1];i++){
	  for(j = dim[2];j<dim[3];j++){
	      //matA[i*M+j] = mat[(i-dim[0])*MB+(j-dim[2])];
	     // finding out which pe gets this i,j element
              cr = (float)( i/mb );
              h = rsrc+(int)(cr);
              pr = h%np;
              cc = (float)( j/mb );
              g = csrc+(int)(cc);
              pc = g%mp;
	      // check if process should get this element
              if (myrow == pr && mycol==pc){
		  // ii = x + l*mb
		  // jj = y + m*nb
                  ll = (float)( ( i/(np*mb) ) );  // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric
                  mm = (float)( ( j/(mp*nb) ) );
                  ii = i%mb + (int)(ll)*mb;
                  jj = j%nb + (int)(mm)*nb;
                  index=jj*mla+ii;   // seems to be the transpose !?
		  //if(index<0) printf("%d/%d: negative index (%d,%d) \n",mype,npe,i,j);
		  //if(index>=mla*nla) printf("%d/%d: too large index (%d,%d) \n",mype,npe,i,j);
                  la[index] = mat[(i-dim[0])*MB+(j-dim[2])];
              }
	  }
	}
	// transpose
	if(j_block != i_block){
	  for(i = dim[0];i<dim[1];i++){
	    for(j = dim[2];j<dim[3];j++){
	      //matA[j*M+i] = mat[(i-dim[0])*MB+(j-dim[2])];
	       // finding out which pe gets this j,i element
              cr = (float)( j/mb );
              h = rsrc+(int)(cr);
              pr = h%np;
              cc = (float)( i/mb );
              g = csrc+(int)(cc);
              pc = g%mp;
	      // check if process should get this element
              if (myrow == pr && mycol==pc){
		  // ii = x + l*mb
		  // jj = y + m*nb
                  ll = (float)( ( j/(np*mb) ) );  // thinks seems to be mixed up does not matter as long as the matrix, the block and the grid is symmetric
                  mm = (float)( ( i/(mp*nb) ) );
                  ii = j%mb + (int)(ll)*mb;
                  jj = i%nb + (int)(mm)*nb;
                  index=jj*mla+ii;   // seems to be the transpose !?
		  //if(index<0) printf("%d/%d: negative index (%d,%d) \n",mype,npe,i,j);
		  //if(index>=mla*nla) printf("%d/%d: too large index (%d,%d) \n",mype,npe,i,j);

                  la[index] = mat[(i-dim[0])*MB+(j-dim[2])];
              }
	    }
	  } 
	}

	}
	
	free(mat);
	t_block++;
      }


      closeStore(store);
    }
*/    
    
    printf("%d/%d: finished scaterring the matrix \n",mype,npe);
    
    printf("%d/%d: start computing \n",mype,npe);
       // set the matrix descriptor
    ierr=0;
    descinit_(idescal, &m, &n  , &mb, &nb , &zero, &zero, &icon, &mla, &ierr); // processor grip id start at 0
    if (mype==0) saveMatrixDescriptor(idescal, scaStore_location);
    
    
    ierr=0;
    descinit_(idescbl, &m, &one  , &mb, &nb , &zero, &zero, &icon, &nla, &ierr); // processor grip id start at 0
    lb = calloc(sizeof(double),mla);
    
    ierr=0;
    // set x
    descinit_(idescxl, &n, &one  , &mb, &nb , &zero, &zero, &icon, &nla, &ierr); // processor grip id start at 0
    lx = calloc(sizeof(double),mla);
    for(i=0;i<mla;i++){
      lx[i] = 1.0/m;
    }
    pddot_(&n,&normx,lx,&one,&one,idescxl,&one,lx,&one,&one,idescxl,&one); // normx <- x'x
    if (mype==0) printf("%d/%d: normx2 %E \n",mype,npe,normx);  
    
    
    ierr=0;
    // set b
    double alpha =1.0;
    double beta =0.0;
    trans = 'N';
    pdgemv_(&trans,&m,&n,&alpha,la,&one,&one,idescal,lx,&one,&one,idescxl,&one,&beta,lb,&one,&one,idescbl,&one); // b <- A x
    pddot_(&n,&normb,lb,&one,&one,idescbl,&one,lb,&one,&one,idescbl,&one); // norm <- b'b
    if (mype==0) printf("%d/%d: normb2 %E \n",mype,npe,normb);  
    
    
    ierr = 0;
    // compute norm 1 of the reduced normal matrix
    /* DO NOT WORK
    lwork = 2*mla+2*nla;
    work = malloc(sizeof(double)*lwork);
    normJob = '1';
    norm = pdlansy_(&normJob, &uplo, &n, la, &one, &one, idescal, work);  // matrix index start at one 
    printf("%d/%d: norm %f \n",mype,npe,norm);
    free(work);
    */
    
    ierr = 0;
    // compute the cholesky decomposition 
    printf("%d/%d: start computing cholesky factor\n",mype,npe);  
    pdpotrf_(&uplo,&n,la,&one,&one,idescal,&ierr);
    printf("%d/%d: finish computing cholesky factor\n",mype,npe);
    openScalapackStore(&scaStore,myrow,mycol,scaStore_location);
    saveLocalMatrix(la,nla,mla,scaStore);
    
    double test=0.0;
    for(i=0;i<nla*mla;i++){
	test += la[i]*la[i];
    }
    printf("%d/%d: finished computing cholesky, test=%f \n",mype,npe,test);
    
    ierr =0;
    // assume x and b set
    // assume cholesky decomposition
    // compute the soluation A x = b
    diag = 'N';
    printf("%d/%d: start solving\n",mype,npe);  
    //pdpptrs_(&uplo, &trans , &diag , &n , &one , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr); // solve triangular system
    //pdtrtrs (&uplo, &trans , &diag , &n , &n , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr);
    pdpotrs_(&uplo, &n , &one , la , &one , &one , idescal , lb , &one , &one , idescbl , &ierr); // b<- A-1 b
    
    alpha = -1.0;
    normb=0;
    pdaxpy_(&n,&alpha,lx,&one,&one,idescxl,&one,lb,&one,&one,idescbl,&one); // b<-b-x
    pddot_(&n,&normb,lb,&one,&one,idescbl,&one,lb,&one,&one,idescbl,&one); // norm <- b'b
    if (mype==0) printf("%d/%d: finish solving, norm2(sol-true) %E \n",mype,npe,normb);  
    
    

    ierr = 0;
    /*
    // compute the eigen values
    jobz= 'N'; uplo='U'; // with N z is ignored
    descinit_(idesczl, &m, &n  , &mb, &nb , &zero, &zero, &icon, &mla, &ierr);
    lz = malloc(sizeof(double)*mla*nla);
    w = malloc(sizeof(double)*m);
    lwork = -1;
    work = malloc(sizeof(double)*2);
    pdsyev_( &jobz, &uplo, &n, la, &one, &one, idescal, w, lz, &one, &one, idesczl, work, &lwork, &ierr);   // only compute lwork
    //pdsyev_( &jobz, &uplo, &n, A, &ione, &ione, descA, W, Z, &ione, &ione, descZ, work, &lwork, &info );
    lwork= (int) work[0];
    free(work);
    work = (double *)calloc(lwork,sizeof(double)) ;
    //MPIt1 = MPI_Wtime();
    pdsyev_( &jobz, &uplo, &n, la, &one, &one, idescal, w, lz, &one, &one, idesczl, work, &lwork, &ierr);   // compute the eigen values
    //MPIt2 = MPI_Wtime();
    //MPIelapsed=MPIt2-MPIt1;
    
    if (mype == 0) {
	saveMatrix(n,w,"eigenvalues.txt");
	//printf("%d/%d: finished job in %8.2fs\n",mype,npe,MPIelapsed); // not working
    }
    */
    
    ierr = 0;
    // compute the conditioner number assume that the norm and the cholesky decomposition have been computed
    /* DO NOT WORK
    lwork = 2*mla+3*nla;
    printf("%d/%d: lwork=%d @%p\n",mype,npe,lwork,&lwork);
    work2 = malloc(sizeof(double)*lwork);
    liwork = 2*mla+3*nla;
    iwork = malloc(sizeof(int)*liwork);
    pdpocon_(&uplo,&n,la,&one,&one,idescal,&norm,&cond,work2,&lwork,iwork,&liwork,&ierr);
    printf("%d/%d: condition number %f \n",mype,npe,cond);
    */
    
    free(la);
    Cblacs_gridexit(icon);
    Cblacs_exit( 0 );
    return 0;
}
コード例 #5
0
ファイル: ft_main_v1.c プロジェクト: suifengls/ee-pdgemm
int main (int argc, char **argv)
{
  //   double *A_local;
   int A_descrip[DESC_SIZE];
   //   double *B_local;
   int B_descrip[DESC_SIZE];
   //   double *C_local;
   int C_descrip[DESC_SIZE];

   int nproc_rows;
   int nproc_cols;
   int m, n, k;
   int blacs_grid;

   int myproc, nprocs;
   char myname[MPI_MAX_PROCESSOR_NAME];
   double *a, *b, *c;

   /* Get input parameters */
   m = GLOBAL_M;
   n = GLOBAL_N;
   k = GLOBAL_K; // 32  

   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
   MPI_Comm_rank(MPI_COMM_WORLD, &myproc);

   /* Ensure we have at least two processors */
   if (nprocs < 2) 
   {
       printf("Too few processors!\n");
       exit (1);
   }

   if(gethostname (myname, MPI_MAX_PROCESSOR_NAME) != 0 )
      printf("Error: gethostname failed!\n");
   else if(HELLO)
      printf("Hello from %2d of %2d on %s\n", myproc, nprocs, myname);

   /* Set to HIGH frequency */
   mapping(myproc%7, DVFS_HIGH);

   Cblacs_get(0, 0, &blacs_grid);

   int ldumap=PROC_NODE;
   nproc_rows=PROC_NODE;
   nproc_cols=PROC_NODE;
   /* ROW MAJOR TILING */
   if(MAJOR==1)
   {
      int usermap[64]=  {0,  1,  8,  9,  16, 17, 24, 25,
                         2,  3,  10, 11, 18, 19, 26, 27,
                         4,  5,  12, 13, 20, 21, 28, 29,
                         6,  7,  14, 15, 22, 23, 30, 31,
                         32, 33, 40, 41, 48, 49, 56, 57,
                         34, 35, 42, 43, 50, 51, 58, 59,
                         36, 37, 44, 45, 52, 53, 60, 61,
                         38, 39, 46, 47, 54, 55, 62, 63};

      Cblacs_gridmap(&blacs_grid, usermap, ldumap, nproc_rows, nproc_cols);
   }
   else if (MAJOR==2)	
    /* COLUMN MAJOR TILING*/
   {
      int usermap[64]={0,  1,  2,  3,  8,  9,  10, 11,
                       4,  5,  6,  7,  12, 13, 14, 15,
                       16, 17, 18, 19, 24, 25, 26, 27,
                       20, 21, 22, 23, 28, 29, 30, 31,
                       32, 33, 34, 35, 40, 41, 42, 43,
                       36, 37, 38, 39, 44, 45, 46, 47,
                       48, 49, 50, 51, 56, 57, 58, 59,
                       52, 53, 54, 55, 60, 61, 62, 63};
    

      Cblacs_gridmap(&blacs_grid, usermap, ldumap, nproc_rows, nproc_cols);
   }
   else if(MAJOR==0)
      Cblacs_gridinit(&blacs_grid, "R", nproc_rows, nproc_cols);
   //   Cblacs_pcoord(blacs_grid, myproc, &my_process_row, &my_process_col);

   int local_m = m/nproc_rows;
   int local_n = n/nproc_cols;
   int local_k = k/nproc_cols;

   if(myproc==SHOW1)
     printf("local m n k = %d %d %d\n",local_m, local_n, local_k);

   a = (double *) malloc (local_m*local_k * sizeof(double));
   b = (double *) malloc (local_k*local_n * sizeof(double));
   c = (double *) malloc (local_m*local_n * sizeof(double));

   //   A_local = (double *) malloc (local_m*local_k * sizeof(double));
   //   B_local = (double *) malloc (local_k*local_n * sizeof(double));
   //   C_local = (double *) malloc (local_m*local_n * sizeof(double));

   if(!a||!b||!c)//||!A_local||!B_local||!C_local)
   {
     printf("out of memory!\n");
     exit(-1);
   }

   Build_descrip(myproc, "A", A_descrip, m, k, local_m, local_k, blacs_grid, local_m);//MAX(local_m, local_k));
   Build_descrip(myproc, "B", B_descrip, k, n, local_k, local_n, blacs_grid, local_k);//MAX(local_k, local_n));
   Build_descrip(myproc, "C", C_descrip, m, n, local_m, local_n, blacs_grid, local_m);//MAX(local_m, local_n));

   if(myproc==SHOW1)
   {
     printf("\nA_descrip = [ %d, %d, %d, %d, %d, %d, %d, %d, %d]\n", 
     A_descrip[0], A_descrip[1], A_descrip[2], A_descrip[3], A_descrip[4], A_descrip[5], A_descrip[6], A_descrip[7], A_descrip[8]);
     printf("\nB_descrip = [ %d, %d, %d, %d, %d, %d, %d, %d, %d]\n", 
     B_descrip[0], B_descrip[1], B_descrip[2], B_descrip[3], B_descrip[4], B_descrip[5], B_descrip[6], B_descrip[7], B_descrip[8]);
     printf("\nC_descrip = [ %d, %d, %d, %d, %d, %d, %d, %d, %d]\n\n", 
     C_descrip[0], C_descrip[1], C_descrip[2], C_descrip[3], C_descrip[4], C_descrip[5], C_descrip[6], C_descrip[7], C_descrip[8]);
   }

       
   int ij = 1;
   char tran = 'N';
   double alpha = 1.0, beta = 1.0;
   double exetime=0;

   MPI_Barrier(MPI_COMM_WORLD);
  
   if(MEASURE && myproc==0)
   {
      system("/apps/power-bench/mclient -H 10.1.255.100 -d /tmp");
      system("/apps/power-bench/mclient -H 10.1.255.100 -l pdgemm.ptr");
      system("/apps/power-bench/mclient -H 10.1.255.100 -s pdgemm");
   }

    
   //   Zeros(A_local, local_m, local_k); 
   //   Zeros(B_local, local_k, local_n); 
   //   Zeros(C_local, local_m, local_n); 
   //   if(myproc%8==0)
   //   RndMatrix(A_local, local_m, local_k, myproc);
      //  if(myproc<8) 
   //   RndMatrix(B_local, local_k, local_n, myproc); 

   MPI_Barrier(MPI_COMM_WORLD);
   //   exetime0 = -MPI_Wtime();
   // ScaLAPACK pdgemm 

   if(!myproc)
     printf("\nM = %d, N = %d, K = %d\n", m, n, k); 
   /*
   pdgemm_(&tran, &tran, &m, &n, &k,
           &alpha, A_local, &ij, &ij, A_descrip,
                   B_local, &ij, &ij, B_descrip,
           &beta,  C_local, &ij, &ij, C_descrip);

  
   MPI_Barrier(MPI_COMM_WORLD);
   exetime0 += MPI_Wtime();
   
   CpyMatrix(A_local, a, local_m, local_k);
   CpyMatrix(B_local, b, local_k, local_n);
   Zeros(c, local_m, local_n); 
   */

   //   if(myproc%8==0)
      RndMatrix(a, local_m, local_k, myproc);
      //  if(myproc<8) 
      RndMatrix(b, local_k, local_n, myproc); 
      Zeros(c, local_m, local_n); 

   MPI_Barrier(MPI_COMM_WORLD);
   exetime = -MPI_Wtime();
   // My pdgemm 
   pdgemm(&tran, &tran, &m, &n, &k, &alpha, a, &ij, &ij, A_descrip, b, &ij, &ij, B_descrip, &beta, c, &ij, &ij, C_descrip);    
   //printf("MYPDGEMM finish\n");
   MPI_Barrier(MPI_COMM_WORLD);
   exetime += MPI_Wtime();
   

   if(MEASURE && myproc==0)
   {
      system("/apps/power-bench/mclient -H 10.1.255.100 -e session");
      system("/apps/power-bench/mclient -H 10.1.255.100 -e log");
   }

   mapping(myproc%7, DVFS_LOW);
   mapping(0, DVFS_HIGH);
   

   if(myproc == SHOW1)
   {
       sleep(1);
       //printf("Total execution time of my_pdgemm is %.3f.\n", exetime);
       printf("Total execution time of pdgemm is %.3f.\n", exetime);

       int i, j;
       /*
       printf("My PDGEMM ID AAA = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%8.5lf   ", a[i*DISP_SIZE+j]);
           printf("\n");
        }
      
       printf("My PDGEMM ID BBB = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%8.5lf   ", b[i*DISP_SIZE+j]);
           printf("\n");
        }
       */
       /*
       printf("My PDGEMM ID CCC = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%10.5lf\t", c[i*DISP_SIZE+j]);
           printf("\n");
        }
       */
       /*      
   }
   if(myproc == SHOW2)
   {
     sleep(3);
       printf("Total execution time of my_pdgemm is %.3f.\n", exetime);
       printf("Total execution time of pdgemm is %.3f.\n", exetime0);

       int i, j;
            
       printf("PDGEMM ID AAA = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%8.5lf   ", A_local[i*DISP_SIZE+j]);
           printf("\n");
        }
       
      
       printf("PDGEMM ID BBB = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%8.5lf   ", B_local[i*DISP_SIZE+j]);
           printf("\n");
        }
       */
       printf("PDGEMM ID CCC = %d :\n",myproc);   
       for(i=0;i<DISP_SIZE;i++)
       {
         for(j=0;j<DISP_SIZE;j++)
         	 printf("%10.5lf\t", c[i*DISP_SIZE+j]);
           printf("\n");
        }
   }

   //   double diffa, diffb, diffc, diff_total=0.0;

   //diffa=diff_norm(A_local, a, local_m, local_k);
   //diffb=diff_norm(B_local, b, local_k, local_n);
   //diffc=diff_norm(C_local, c, local_m, local_n);
   //MPI_Reduce(&diffa, &diff_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   //   sleep(1);
   /*
   if(!myproc)
      printf("The total normal difference between my pdgemm A and ScaLAPACK pdgemm A is %e.\n", diff_total);
   MPI_Reduce(&diffb, &diff_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   sleep(1);
   if(!myproc)
      printf("The total normal difference between my pdgemm B and ScaLAPACK pdgemm B is %e.\n", diff_total);
   MPI_Reduce(&diffc, &diff_total, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   sleep(1);
   if(!myproc)
      printf("The total normal difference between my pdgemm C and ScaLAPACK pdgemm C is %e.\n", diff_total);
   */

   free(a); free(b); free(c);
   //free(A_local);free(B_local);free(C_local);
   Cblacs_exit(1);
   /* Clean-up and close down */ 
   MPI_Barrier(MPI_COMM_WORLD);
   //MPI_Comm_free(&my_row_comm); 
   //MPI_Comm_free(&my_column_comm);  
   MPI_Finalize();
   return 0;
}
コード例 #6
0
ファイル: test_pzgemr2d.cpp プロジェクト: toxa81/testing
void test_gemr2d(int M, int N)
{
    int repeat = 10;
    
    int32_t one = 1;
    int32_t isrc = 0;

    int32_t num_ranks;
    MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
    int32_t rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int32_t bs_row_A = M / num_ranks + std::min(1, M % num_ranks);
    int32_t bs_col_A = 1;

    int32_t bs_row_B = 1;
    int32_t bs_col_B = 1;

    int32_t blacs_handler = Csys2blacs_handle(MPI_COMM_WORLD);
    int32_t context1 = blacs_handler;
    int32_t context2 = blacs_handler;

    /* create BLACS context */
    Cblacs_gridinit(&context1, "C", num_ranks, 1);
    /* get row and column ranks */
    int32_t rank_row1, rank_col1;
    Cblacs_gridinfo(context1, &num_ranks, &one, &rank_row1, &rank_col1);
    /* get local number of rows and columns of a matrix */
    int32_t num_rows_local1, num_cols_local1;
    num_rows_local1 = numroc_(&M, &bs_row_A, &rank_row1, &isrc, &num_ranks);
    num_cols_local1 = numroc_(&N, &bs_col_A, &rank_col1, &isrc, &one);


    Cblacs_gridinit(&context2, "C", 1, num_ranks);
    int32_t rank_row2, rank_col2;
    Cblacs_gridinfo(context2, &one, &num_ranks, &rank_row2, &rank_col2);
    int32_t num_rows_local2, num_cols_local2;
    num_rows_local2 = numroc_(&M, &bs_row_B, &rank_row2, &isrc, &one);
    num_cols_local2 = numroc_(&N, &bs_col_B, &rank_col2, &isrc, &num_ranks);

    if (rank == 0)
    {
        printf("local dimensions of A: %i x %i\n", num_rows_local1, num_cols_local1);
        printf("local dimensions of B: %i x %i\n", num_rows_local2, num_cols_local2);
    } 

    int32_t descA[9], descB[9], info;
    descinit_(descA, &M, &N, &bs_row_A, &bs_col_A, &isrc, &isrc, &context1, &num_rows_local1, &info);
    descinit_(descB, &M, &N, &bs_row_B, &bs_col_B, &isrc, &isrc, &context2, &num_rows_local2, &info);

    std::vector<double_complex> A(num_rows_local1 * num_cols_local1);
    std::vector<double_complex> B(num_rows_local2 * num_cols_local2, double_complex(0, 0));
    std::vector<double_complex> C(num_rows_local1 * num_cols_local1);

    for (int i = 0; i < num_rows_local1 * num_cols_local1; i++)
    {
        A[i] = double_complex(double(rand()) / RAND_MAX, double(rand()) / RAND_MAX);
        C[i] = A[i];
    }

    double time = -MPI_Wtime();
    for (int i = 0; i < repeat; i++)
    {
        pzgemr2d_(&M, &N, &A[0], &one, &one, descA, &B[0], &one, &one, descB, &context1);
    }
    time += MPI_Wtime();

    if (rank == 0)
    {
        printf("average time %.4f sec, swap speed: %.4f GB/sec\n", time / repeat,
               sizeof(double_complex) * repeat * M * N / double(1 << 30) / time);
    }

    /* check correctness */
    pzgemr2d_(&M, &N, &B[0], &one, &one, descB, &A[0], &one, &one, descA, &context1);
    for (int i = 0; i < num_rows_local1 * num_cols_local1; i++)
    {
        if (std::abs(A[i] - C[i]) > 1e-14)
        {
            printf("Fail.\n");
            exit(0);
        }
    }

    Cblacs_gridexit(context1);
    Cblacs_gridexit(context2);
    Cfree_blacs_system_handle(blacs_handler);
}
コード例 #7
0
ファイル: test_psgesvd.c プロジェクト: SCM-NV/primme
int main(int argc, char **argv) {
        int iam, nprocs;
        int myrank_mpi, nprocs_mpi;
        int ictxt, nprow, npcol, myrow, mycol;
        int nb, m, n;
        int mpA, nqA, mpU, nqU, mpVT, nqVT;
        int i, j, k, itemp, min_mn;
        int descA[9], descU[9], descVT[9];
        float *A=NULL;
        int info, infoNN, infoVV, infoNV, infoVN;
        float *U_NN=NULL,  *U_VV=NULL,  *U_NV=NULL,  *U_VN=NULL;
        float *VT_NN=NULL, *VT_VV=NULL, *VT_NV=NULL, *VT_VN=NULL;
        float *S_NN=NULL,  *S_VV=NULL, *S_NV=NULL, *S_VN=NULL;
        float *S_res_NN=NULL;
        float orthU_VV, residF, orthVT_VV;
        float orthU_VN, orthVT_NV;
        float  residS_NN, eps;
        float  res_repres_NV, res_repres_VN;
/**/
        int izero=0,ione=1;
        float rtmone=-1.0e+00;
/**/
        double MPIelapsedVV, MPIelapsedNN, MPIelapsedVN, MPIelapsedNV;
        char jobU, jobVT;
        int nbfailure=0, nbtestcase=0,inputfromfile, nbhetereogeneity=0;
        float threshold=100e+00;
        char buf[1024];
        FILE *fd;       
        char *c;
        char *t_jobU, *t_jobVT;
        int *t_m, *t_n, *t_nb, *t_nprow, *t_npcol;
        int nb_expe, expe;
        char hetereogeneityVV, hetereogeneityNN, hetereogeneityVN, hetereogeneityNV;
        int iseed[4], idist;
/**/
        MPI_Init( &argc, &argv);
        MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
        MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
        m = 100; n = 100; nprow = 1; npcol = 1; nb = 64; jobU='A'; jobVT='A'; inputfromfile = 0;
        for( i = 1; i < argc; i++ ) {
                if( strcmp( argv[i], "-f" ) == 0 ) {
                        inputfromfile = 1;
                }
                if( strcmp( argv[i], "-jobvt" ) == 0 ) {
                        if (i+1<argc) {
                                if( strcmp( argv[i+1], "V" ) == 0 ){ jobVT = 'V'; i++; }
                                else if( strcmp( argv[i+1], "N" ) == 0 ){ jobVT = 'N'; i++; }
                                else if( strcmp( argv[i+1], "A" ) == 0 ){ jobVT = 'A'; i++; }
                                else printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n");
                        }
                        else    
                                printf(" ** warning: jobvt should be set to V, N or A in the command line ** \n");
                }
                if( strcmp( argv[i], "-jobu" ) == 0 ) {
                        if (i+1<argc) {
                                if( strcmp( argv[i+1], "V" ) == 0 ){ jobU = 'V'; i++; }
                                else if( strcmp( argv[i+1], "N" ) == 0 ){ jobU = 'N'; i++; }
                                else if( strcmp( argv[i+1], "A" ) == 0 ){ jobU = 'A'; i++; }
                                else printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
                        }
                        else    
                                printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
                }
                if( strcmp( argv[i], "-m" ) == 0 ) {
                        m      = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-n" ) == 0 ) {
                        n      = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-p" ) == 0 ) {
                        nprow  = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-q" ) == 0 ) {
                        npcol  = atoi(argv[i+1]);
                        i++;
                }
                if( strcmp( argv[i], "-nb" ) == 0 ) {
                        nb     = atoi(argv[i+1]);
                        i++;
                }
        }
/**/
        if (inputfromfile){
                nb_expe = 0;
                fd = fopen("svd.dat", "r");
                if (fd == NULL) { printf("File failed to open svd.dat from processor mpirank(%d/%d): \n",myrank_mpi,nprocs_mpi); exit(-1); }
                do {    
                        c = fgets(buf, 1024, fd);  /* get one line from the file */
                        if (c != NULL)
                                if (c[0] != '#')
                                        nb_expe++;
                } while (c != NULL);              /* repeat until NULL          */
                fclose(fd);
                t_jobU  = (char *)calloc(nb_expe,sizeof(char)) ;
                t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ;
                t_m     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_n     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nb    = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nprow = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_npcol = (int  *)calloc(nb_expe,sizeof(int )) ;
                fd = fopen("svd.dat", "r");
                expe=0;
                do {    
                        c = fgets(buf, 1024, fd);  /* get one line from the file */
                        if (c != NULL)
                                if (c[0] != '#'){
                                        //printf("NBEXPE = %d\n",expe);
                                        sscanf(c,"%c %c %d %d %d %d %d",
                                                &(t_jobU[expe]),&(t_jobVT[expe]),&(t_m[expe]),&(t_n[expe]),
                                                &(t_nb[expe]),(&t_nprow[expe]),&(t_npcol[expe]));
                                        expe++;
                                }
                } while (c != NULL);              /* repeat until NULL          */
                fclose(fd);
        }
        else {
                nb_expe = 1;
                t_jobU  = (char *)calloc(nb_expe,sizeof(char)) ;
                t_jobVT = (char *)calloc(nb_expe,sizeof(char)) ;
                t_m     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_n     = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nb    = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_nprow = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_npcol = (int  *)calloc(nb_expe,sizeof(int )) ;
                t_jobU[0]  = jobU;
                t_jobVT[0] = jobVT;
                t_m[0]     = m;
                t_n[0]     = n;
                t_nb[0]    = nb;
                t_nprow[0] = nprow;
                t_npcol[0] = npcol;
        }

        if (myrank_mpi==0){
                printf("\n");
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                                printf("                            Testing psgsevd -- float precision SVD ScaLAPACK routine                \n");
                printf("jobU jobVT    m     n     nb   p   q   || info   heter   resid     orthU    orthVT   |SNN-SVV|    time(s)   cond(A) \n");
                printf("--------------------------------------------------------------------------------------------------------------------\n");
        }
/**/
        for (expe = 0; expe<nb_expe; expe++){

        jobU  = t_jobU[expe]  ; 
        jobVT = t_jobVT[expe] ; 
        m     = t_m[expe]     ; 
        n     = t_n[expe]     ; 
        nb    = t_nb[expe]    ; 
        nprow = t_nprow[expe] ; 
        npcol = t_npcol[expe] ; 

        if (nb>n)
                nb = n;
        if (nprow*npcol>nprocs_mpi){
                if (myrank_mpi==0)
                        printf(" **** ERROR : we do not have enough processes available to make a p-by-q process grid ***\n");
                        printf(" **** Bye-bye                                                                         ***\n");
                MPI_Finalize(); exit(1);
        }
/**/
        Cblacs_pinfo( &iam, &nprocs ) ;
        Cblacs_get( -1, 0, &ictxt );
        Cblacs_gridinit( &ictxt, "Row", nprow, npcol );
        Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
/**/
        min_mn = min(m,n);
/**/
        //if (iam==0)
                //printf("\tm=%d\tn = %d\t\t(%d,%d)\t%dx%d\n",m,n,nprow,npcol,nb,nb);
        //printf("Hello World, I am proc %d over %d for MPI, proc %d over %d for BLACS in position (%d,%d) in the process grid\n", 
                        //myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol);
/*
*
*     Work only the process in the process grid
*
*/
        //if ((myrow < nprow)&(mycol < npcol)){
        if ((myrow>-1)&(mycol>-1)&(myrow<nprow)&(mycol<npcol)){

/*
*
*     Compute the size of the local matrices (thanks to numroc)
*
*/ 
                mpA    = numroc_( &m     , &nb, &myrow, &izero, &nprow );
                nqA    = numroc_( &n     , &nb, &mycol, &izero, &npcol );
                mpU    = numroc_( &m     , &nb, &myrow, &izero, &nprow );
                nqU    = numroc_( &min_mn, &nb, &mycol, &izero, &npcol );
                mpVT   = numroc_( &min_mn, &nb, &myrow, &izero, &nprow );
                nqVT   = numroc_( &n     , &nb, &mycol, &izero, &npcol );
/*
*
*     Allocate and fill the matrices A and B
*
*/ 
                A = (float *)calloc(mpA*nqA,sizeof(float)) ;
                if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
/**/            
//              seed = iam*(mpA*nqA*2); srand(seed);
                idist = 2;
                iseed[0] = mpA%4096;
                iseed[1] = iam%4096;
                iseed[2] = nqA%4096;
                iseed[3] = 23;
/**/            
                k = 0;
                for (i = 0; i < mpA; i++) {
                        for (j = 0; j < nqA; j++) {
                                slarnv_( &idist, iseed, &ione, &(A[k]) );
                                k++;    
                        }
                }
/*
*
*     Initialize the array descriptor for the distributed matrices xA, U and VT
*
*/ 
                itemp = max( 1, mpA );
                descinit_( descA,  &m, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
                itemp = max( 1, mpA );
                descinit_( descU,  &m, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
                itemp = max( 1, mpVT );
                descinit_( descVT, &min_mn, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
/**/
                eps = pslamch_( &ictxt, "Epsilon" );
/**/
                if ( ((jobU=='V')&(jobVT=='N')) ||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                U_VN = (float *)calloc(mpU*nqU,sizeof(float)) ;
                if (U_VN==NULL){ printf("error of memory allocation U_VN on proc %dx%d\n",myrow,mycol); exit(0); }
                S_VN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_VN==NULL){ printf("error of memory allocation S_VN on proc %dx%d\n",myrow,mycol); exit(0); }
                infoVN = driver_psgesvd( 'V', 'N', m, n, A, 1, 1, descA,
                        S_VN, U_VN, 1, 1, descU, VT_VN, 1, 1, descVT,
                        &MPIelapsedVN);
                orthU_VN  = verif_orthogonality(m,min_mn,U_VN , 1, 1, descU);
                res_repres_VN = verif_repres_VN( m, n, A, 1, 1, descA, U_VN, 1, 1, descU, S_VN);
                if (infoVN==min_mn+1) hetereogeneityVN = 'H'; else hetereogeneityVN = 'N';
                if ( iam==0 )
                        printf(" V    N   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e   %7.1e                        %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoVN,hetereogeneityVN,res_repres_VN/(S_VN[0]/S_VN[min_mn-1]),
                                orthU_VN,MPIelapsedVN,S_VN[0]/S_VN[min_mn-1]);
                if (infoVN==min_mn+1) nbhetereogeneity++ ;
                else if ((res_repres_VN/eps/(S_VN[0]/S_VN[min_mn-1])>threshold)||(orthU_VN/eps>threshold)||(infoVN!=0)) nbfailure++;
                }
/**/
                if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                VT_NV = (float *)calloc(mpVT*nqVT,sizeof(float)) ;
                if (VT_NV==NULL){ printf("error of memory allocation VT_NV on proc %dx%d\n",myrow,mycol); exit(0); }
                S_NV = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_NV==NULL){ printf("error of memory allocation S_NV on proc %dx%d\n",myrow,mycol); exit(0); }
                infoNV = driver_psgesvd( 'N', 'V', m, n, A, 1, 1, descA,
                        S_NV, U_NV, 1, 1, descU, VT_NV, 1, 1, descVT,
                        &MPIelapsedNV);
                orthVT_NV = verif_orthogonality(min_mn,n,VT_NV, 1, 1, descVT);
                res_repres_NV = verif_repres_NV( m, n, A, 1, 1, descA, VT_NV, 1, 1, descVT, S_NV);
                if (infoNV==min_mn+1) hetereogeneityNV = 'H'; else hetereogeneityNV = 'N';
                if ( iam==0 )
                        printf(" N    V   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e             %7.1e              %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoNV,hetereogeneityNV,res_repres_NV/(S_NV[0]/S_NV[min_mn-1]),
                                orthVT_NV,MPIelapsedNV,S_NV[0]/S_NV[min_mn-1]);
                if (infoNV==min_mn+1) nbhetereogeneity++ ;
                else if ((res_repres_NV/eps/(S_NV[0]/S_NV[min_mn-1])>threshold)||(orthVT_NV/eps>threshold)||(infoNV!=0)) nbfailure++;
                }
/**/
                if ( ((jobU=='N')&(jobVT=='N')) || ((jobU=='V')&(jobVT=='V')) || (jobU == 'A' ) || (jobVT=='A') ) {
                nbtestcase++;   
                U_VV = (float *)calloc(mpU*nqU,sizeof(float)) ;
                if (U_VV==NULL){ printf("error of memory allocation U_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                VT_VV = (float *)calloc(mpVT*nqVT,sizeof(float)) ;
                if (VT_VV==NULL){ printf("error of memory allocation VT_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                S_VV = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_VV==NULL){ printf("error of memory allocation S_VV on proc %dx%d\n",myrow,mycol); exit(0); }
                infoVV = driver_psgesvd( 'V', 'V', m, n, A, 1, 1, descA,
                        S_VV, U_VV, 1, 1, descU, VT_VV, 1, 1, descVT,
                        &MPIelapsedVV);
                orthU_VV  = verif_orthogonality(m,min_mn,U_VV , 1, 1, descU);
                orthVT_VV = verif_orthogonality(min_mn,n,VT_VV, 1, 1, descVT);
                residF =  verif_representativity( m, n,     A, 1, 1, descA,
                                                         U_VV, 1, 1, descU,
                                                        VT_VV, 1, 1, descVT,
                                                         S_VV);
                if (infoVV==min_mn+1) hetereogeneityVV = 'H'; else hetereogeneityVV = 'N';
                if ( iam==0 )
                        printf(" V    V   %6d %6d  %3d  %3d %3d  ||  %3d     %c   %7.1e   %7.1e   %7.1e              %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoVV,hetereogeneityVV,residF,orthU_VV,orthVT_VV,MPIelapsedVV,S_VV[0]/S_VV[min_mn-1]);
                if (infoVV==min_mn+1) nbhetereogeneity++ ;
                else if ((residF/eps>threshold)||(orthU_VV/eps>threshold)||(orthVT_VV/eps>threshold)||(infoVV!=0)) nbfailure++;
                }
/**/
                if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){
                nbtestcase++;   
                S_NN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_NN==NULL){ printf("error of memory allocation S_NN on proc %dx%d\n",myrow,mycol); exit(0); }
                infoNN = driver_psgesvd( 'N', 'N', m, n, A, 1, 1, descA,
                        S_NN, U_NN, 1, 1, descU, VT_NN, 1, 1, descVT,
                        &MPIelapsedNN);
                S_res_NN = (float *)calloc(min_mn,sizeof(float)) ;
                if (S_res_NN==NULL){ printf("error of memory allocation S on proc %dx%d\n",myrow,mycol); exit(0); }
                scopy_(&min_mn,S_VV,&ione,S_res_NN,&ione);
                saxpy_ (&min_mn,&rtmone,S_NN,&ione,S_res_NN,&ione);
                residS_NN = snrm2_(&min_mn,S_res_NN,&ione) / snrm2_(&min_mn,S_VV,&ione);
                free(S_res_NN);
                if (infoNN==min_mn+1) hetereogeneityNN = 'H'; else hetereogeneityNN = 'N';
                if ( iam==0 )
                        printf(" N    N   %6d %6d  %3d  %3d %3d  ||  %3d     %c                                  %7.1e   %8.2f    %7.1e\n",
                                m,n,nb,nprow,npcol,infoNN,hetereogeneityNN,residS_NN,MPIelapsedNN,S_NN[0]/S_NN[min_mn-1]);
                if (infoNN==min_mn+1) nbhetereogeneity++ ;
                else if ((residS_NN/eps>threshold)||(infoNN!=0)) nbfailure++;
                }
/**/
                if (((jobU=='V')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_VN); free(U_VN); }
                if (((jobU=='N')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(VT_NV); free(S_NV); }
                if (((jobU=='N')&(jobVT=='N'))||(jobU == 'A' )||(jobVT=='A')){ free(S_NN); }
                if (((jobU=='N')&(jobVT=='N'))||((jobU=='V')&(jobVT=='V'))||(jobU == 'A' )||(jobVT=='A')){ free(U_VV); free(S_VV); free(VT_VV);}
                free(A);
                Cblacs_gridexit( 0 );
        }
/*
*     Print ending messages
*/
        }
        if ( iam==0 ){
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                printf("               [ nbhetereogeneity = %d / %d ]\n",nbhetereogeneity, nbtestcase);
                printf("               [ nbfailure        = %d / %d ]\n",nbfailure, nbtestcase-nbhetereogeneity);
                printf("--------------------------------------------------------------------------------------------------------------------\n");
                printf("\n");
        }
/**/
        free(t_jobU  );
        free(t_jobVT );
        free(t_m     );
        free(t_n     );
        free(t_nb    );
        free(t_nprow );
        free(t_npcol );
        MPI_Finalize();
        exit(0);
}
コード例 #8
0
ファイル: cosdist.c プロジェクト: kouamano/utility
int MAIN__(int argc, char** argv) {
    int num;  // number of data
    int dim;  // dimension of each data
    int nprow=4; // number of row
    int npcol=1;  // number of columnn
    int zero=0, one=1; // constant value
    int ictxt,myrow,mycol,pnum,pdim,info;
    char ifilename[LEN_FILENAME];
    char ofilename[LEN_FILENAME];

    int myproc, nprocs;
    Cblacs_pinfo(&myproc, &nprocs);
    Cblacs_setup(&myproc, &nprocs);
    Cblacs_get(-1,0,&ictxt);
    nprow = nprocs;
    npcol = 1; // fixed

    char order[] = "Row";
    Cblacs_gridinit(&ictxt, order, nprow, npcol);
    Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);

    if (DEBUG_MODE) {
        printf("ConTxt = %d\n", ictxt);
        printf("nprocs=%d, nprow=%d, npcol=%d\n", nprocs, nprow, npcol);
        printf("nprocs=%d, myrow=%d, mycol=%d\n", nprocs, myrow, mycol);
    }

    get_option(argc, argv, ifilename, ofilename, &num, &dim);

    // 0. cosinedist(ij) = 1 - V(i)V(j)/(Length(V(i))*Length(V(j)))

    // 1. calculate submatrix size
    int bsize = num / nprow; // blocking factor
    pnum = num / nprow;
    pdim = dim;
    if ( myrow < (num/bsize)%nprow) {
        pnum += bsize;
    }
    else if ( myrow == (num/bsize)%nprow) {
        pnum += (num % bsize);
    }
    else {
    }
    if(DEBUG_MODE)
        printf("myproc=%d: pnum=%d, pdim=%d, bsize=%d\n", myproc, pnum, pdim, bsize);

    int desc_input[9], desc_v[9], desc_ip[9], desc_n[9], desc_result[9];
    descinit_(desc_input,  &num, &dim, &num,   &dim,  &zero, &zero, &ictxt, &num,  &info);
    descinit_(desc_v,      &num, &dim, &bsize, &pdim, &zero, &zero, &ictxt, &pnum, &info);
    descinit_(desc_ip,     &num, &num, &bsize, &num,  &zero, &zero, &ictxt, &pnum, &info);
    descinit_(desc_n,      &num, &one, &bsize, &one,  &zero, &zero, &ictxt, &pnum, &info);
    descinit_(desc_result, &num, &num, &num,   &num,  &zero, &zero, &ictxt, &num,  &info);

    // 2. read input data
    double* input;
    if (myproc == 0) {
        input = (double*)malloc(sizeof(double)*num*dim);
        memset(input, 0, sizeof(double)*num*dim);
        read_data(ifilename, num, dim, input);
        printArray("input", myproc, input, num, dim);
    }

    // 3. distribute input data array
    double* V = (double*)malloc(sizeof(double)*pnum*pdim);
    memset(V, 0, sizeof(double)*pnum*pdim);
    Cpdgemr2d(num, dim, input, 1, 1, desc_input, V, 1, 1, desc_v, ictxt);
    printArray("V", myproc, V, pnum, pdim);

    // 4. InnerProduct = VV'
    double* InnerProduct = (double*)malloc(sizeof(double)*pnum*num);
    memset(InnerProduct, 0, sizeof(double)*pnum*num);
    char transa = 'N', transb = 'T';
    int m = num, n = num, k = dim;
    int lda = num, ldb = num, ldc = num;
    double alpha = 1.0f, beta = 0.0f;
    pdgemm_(&transa, &transb, &m, &n, &k, &alpha, V, &one, &one, desc_v, V, &one, &one, desc_v, &beta, InnerProduct, &one, &one, desc_ip);
    printArray("InnerProduct", myproc, InnerProduct, pnum, num);

    // 5. Norm of each vector
    double* Norm = (double*)malloc(sizeof(double)*pnum);
    for (int i = 0; i < pnum; i++) {
        int n = ((myproc*bsize)+(i/bsize)*(nprocs-1)*bsize+i)*pnum + i;
        Norm[i] = sqrt(InnerProduct[n]);
    }
    printArray("Norm", myproc, Norm, 1, pnum);

    // 6. Norm product matrix
    double* NormProduct = (double*)malloc(sizeof(double)*pnum*num);
    memset(NormProduct, 0, sizeof(double)*pnum*num);
    char uplo = 'U';
    n = num;
    alpha = 1.0f;
    int incx = 1;
    lda = num;
    pdsyr_(&uplo, &n, &alpha, Norm, &one, &one, desc_n, &incx, NormProduct, &one, &one, desc_ip);
    printArray("NormProduct", myproc, NormProduct, pnum, num);

    // 7. CosineDistance(ij) = 1-InnerProduct(ij)/NormProduct(ij)
    double* CosineDistance = (double*)malloc(sizeof(double)*pnum*num);
    memset(CosineDistance, 0, sizeof(double)*pnum*num);
    for (int j = 0; j < num; j++) {
        for (int i = 0; i < pnum; i++) {
            int n = ((myproc*bsize)+i+(i/bsize)*(nprocs-1)*bsize)*pnum+i;
            int p = i+j*pnum;
            if (p<=n) {
                CosineDistance[p] = 0.0;
            }
            else {
                CosineDistance[p] = 1 - InnerProduct[p]/NormProduct[p];
            }
        }
    }
    printArray("CosineDistance", myproc, CosineDistance, pnum, num);

    // 8. gather result
    double* result;
    if ( myproc == 0 ) {
        result = (double*)malloc(sizeof(double)*num*num);
        memset(result, 0, sizeof(double)*num*num);
    }
    Cpdgemr2d(num, num, CosineDistance, 1, 1, desc_ip, result, 1, 1, desc_result, ictxt);

    // 9. output to file
    if ( myproc == 0 ) {
        output_results(ofilename, result, num, num);
    }

    // a. cleanup memory
    free(V);
    free(InnerProduct);
    free(Norm);
    free(NormProduct);
    free(CosineDistance);
    if ( myproc == 0 ) {
        free(input);
        free(result);
    }

    blacs_exit_(&zero);

    return 0;
}
コード例 #9
0
ファイル: pdgemm_bench.c プロジェクト: jrs65/scalapy
int main(int argc, char **argv) {

  int ictxt, nside, ngrid, nblock, nthread;
  int rank, size;
  int ic, ir, nc, nr;

  int i, j;

  char *fname;
  
  int info, ZERO=0, ONE=1;

  struct timeval st, et;

  double dtnn, dtnt, dttn, dttt;
  double gfpc_nn, gfpc_nt, gfpc_tn, gfpc_tt;

  /* Initialising MPI stuff */
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  printf("Process %i of %i.\n", rank, size);


  /* Parsing arguments */
  if(argc < 6) {
    exit(-3);
  }

  nside = atoi(argv[1]);
  ngrid = atoi(argv[2]);
  nblock = atoi(argv[3]);
  nthread = atoi(argv[4]);
  fname = argv[5];

  if(rank == 0) {
    printf("Multiplying matrices of size %i x %i\n", nside, nside);
    printf("Process grid size %i x %i\n", ngrid, ngrid);
    printf("Block size %i x %i\n", nblock, nblock);
    printf("Using %i OpenMP threads\n", nthread);
  }



  #ifdef _OPENMP
  if(rank == 0) printf("Setting OMP_NUM_THREADS=%i\n", nthread);
  omp_set_num_threads(nthread);
  #endif

  /* Setting up BLACS */
  Cblacs_pinfo( &rank, &size ) ;
  Cblacs_get(-1, 0, &ictxt );
  Cblacs_gridinit(&ictxt, "Row", ngrid, ngrid);
  Cblacs_gridinfo(ictxt, &nr, &nc, &ir, &ic);

  int descA[9], descB[9], descC[9];

  /* Fetch local array sizes */
  int Ar, Ac, Br, Bc, Cr, Cc;

  Ar = numroc_( &nside, &nblock, &ir, &ZERO, &nr);
  Ac = numroc_( &nside, &nblock, &ic, &ZERO, &nc);

  Br = numroc_( &nside, &nblock, &ir, &ZERO, &nr);
  Bc = numroc_( &nside, &nblock, &ic, &ZERO, &nc);

  Cr = numroc_( &nside, &nblock, &ir, &ZERO, &nr);
  Cc = numroc_( &nside, &nblock, &ic, &ZERO, &nc);

  printf("Local array section %i x %i\n", Ar, Ac);

  /* Set descriptors */
  descinit_(descA, &nside, &nside, &nblock, &nblock, &ZERO, &ZERO, &ictxt, &Ar, &info);
  descinit_(descB, &nside, &nside, &nblock, &nblock, &ZERO, &ZERO, &ictxt, &Br, &info);
  descinit_(descC, &nside, &nside, &nblock, &nblock, &ZERO, &ZERO, &ictxt, &Cr, &info);

  /* Initialise and fill arrays */
  double *A = (double *)malloc(Ar*Ac*sizeof(double));
  double *B = (double *)malloc(Br*Bc*sizeof(double));
  double *C = (double *)malloc(Cr*Cc*sizeof(double));


  for(i = 0; i < Ar; i++) {
    for(j = 0; j < Ac; j++) {
      A[j*Ar + i] = drand48();
      B[j*Br + i] = drand48();
      C[j*Cr + i] = 0.0;
    }
  }

  double alpha = 1.0, beta = 0.0;

  //========================
  
  if(rank == 0) printf("Starting multiplication (NN).\n");

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&st, NULL);

  pdgemm_("N", "N", &nside, &nside, &nside,
	  &alpha,
	  A, &ONE, &ONE, descA,
	  B, &ONE, &ONE, descB,
	  &beta,
	  C, &ONE, &ONE, descC );

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&et, NULL);
  dtnn = (double)((et.tv_sec-st.tv_sec) + (et.tv_usec-st.tv_usec)*1e-6);
  gfpc_nn = 2.0*pow(nside, 3) / (dtnn * 1e9 * ngrid * ngrid * nthread);

  if(rank == 0) printf("Done.\n=========\nTime taken: %g s\nGFlops per core: %g\n=========\n", dtnn, gfpc_nn);

  //========================



  //========================

  if(rank == 0) printf("Starting multiplication (NT).\n");

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&st, NULL);

  pdgemm_("N", "T", &nside, &nside, &nside,
	  &alpha,
	  A, &ONE, &ONE, descA,
	  B, &ONE, &ONE, descB,
	  &beta,
	  C, &ONE, &ONE, descC );

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&et, NULL);
  dtnt = (double)((et.tv_sec-st.tv_sec) + (et.tv_usec-st.tv_usec)*1e-6);
  gfpc_nt = 2.0*pow(nside, 3) / (dtnt * 1e9 * ngrid * ngrid * nthread);

  if(rank == 0) printf("Done.\n=========\nTime taken: %g s\nGFlops per core: %g\n=========\n", dtnt, gfpc_nt);

  //========================



  //========================

  if(rank == 0) printf("Starting multiplication (TN).\n");

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&st, NULL);

  pdgemm_("T", "N", &nside, &nside, &nside,
	  &alpha,
	  A, &ONE, &ONE, descA,
	  B, &ONE, &ONE, descB,
	  &beta,
	  C, &ONE, &ONE, descC );

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&et, NULL);
  dttn = (double)((et.tv_sec-st.tv_sec) + (et.tv_usec-st.tv_usec)*1e-6);
  gfpc_tn = 2.0*pow(nside, 3) / (dttn * 1e9 * ngrid * ngrid * nthread);

  if(rank == 0) printf("Done.\n=========\nTime taken: %g s\nGFlops per core: %g\n=========\n", dttn, gfpc_tn);

  //========================



  //========================

  if(rank == 0) printf("Starting multiplication (TT).\n");

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&st, NULL);

  pdgemm_("T", "T", &nside, &nside, &nside,
	  &alpha,
	  A, &ONE, &ONE, descA,
	  B, &ONE, &ONE, descB,
	  &beta,
	  C, &ONE, &ONE, descC );

  Cblacs_barrier(ictxt,"A");
  gettimeofday(&et, NULL);
  dttt = (double)((et.tv_sec-st.tv_sec) + (et.tv_usec-st.tv_usec)*1e-6);
  gfpc_tt = 2.0*pow(nside, 3) / (dttt * 1e9 * ngrid * ngrid * nthread);

  if(rank == 0) printf("Done.\n=========\nTime taken: %g s\nGFlops per core: %g\n=========\n", dttt, gfpc_tt);

  //========================




  if(rank == 0) {
    FILE * fd;
    fd = fopen(fname, "w");
    fprintf(fd, "%g %g %g %g %i %i %i %i %g %g %g %g\n", gfpc_nn, gfpc_nt, gfpc_tn, gfpc_tt, nside, ngrid, nblock, nthread, dtnn, dtnt, dttn, dttt);
    fclose(fd);
  }

  Cblacs_gridexit( 0 );
  MPI_Finalize();

}