Ejemplo n.º 1
0
void FORTRAN_API mpi_file_write_all_(MPI_Fint *fh,void *buf,int *count,
                       MPI_Datatype *datatype,MPI_Status *status, int *ierr ){
    MPI_File fh_c;
    
    fh_c = MPI_File_f2c(*fh);
    *ierr = MPI_File_write_all(fh_c,buf,*count,*datatype,status);
}
Ejemplo n.º 2
0
void seissol::checkpoint::mpio::Wavefield::write(double time, int timestepWaveField)
{
	EPIK_TRACER("CheckPoint_write");
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Writing check point.";

	// Write the header
	writeHeader(time, timestepWaveField);

	// Save data
	EPIK_USER_REG(r_write_wavefield, "checkpoint_write_wavefield");
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	EPIK_USER_START(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);

	checkMPIErr(setDataView(file()));

	checkMPIErr(MPI_File_write_all(file(), dofs(), numDofs(), MPI_DOUBLE, MPI_STATUS_IGNORE));

	EPIK_USER_END(r_write_wavefield);
	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Writing check point. Done.";
}
Ejemplo n.º 3
0
void mpi_file_write_all_(MPI_Fint *fh,void *buf,int *count,
                       MPI_Fint *datatype,MPI_Status *status, int *ierr ){
    MPI_File fh_c;
    MPI_Datatype datatype_c;
    
    fh_c = MPI_File_f2c(*fh);
    datatype_c = MPI_Type_f2c(*datatype);

    *ierr = MPI_File_write_all(fh_c,buf,*count,datatype_c,status);
}
Ejemplo n.º 4
0
PetscErrorCode MPIU_File_write_all(MPI_File fd,void *data,PetscMPIInt cnt,MPI_Datatype dtype,MPI_Status *status)
{
  PetscErrorCode ierr;
  PetscDataType  pdtype;

  PetscFunctionBegin;
  ierr = PetscMPIDataTypeToPetscDataType(dtype,&pdtype);CHKERRQ(ierr);
  ierr = PetscByteSwap(data,pdtype,cnt);CHKERRQ(ierr);
  ierr = MPI_File_write_all(fd,data,cnt,dtype,status);CHKERRQ(ierr);
  ierr = PetscByteSwap(data,pdtype,cnt);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Ejemplo n.º 5
0
JNIEXPORT void JNICALL Java_mpi_File_writeAll(
        JNIEnv *env, jobject jthis, jlong fh, jobject buf, jboolean db,
        jint off, jint count, jlong jType, jint bType, jlongArray stat)
{
    MPI_Datatype type = (MPI_Datatype)jType;
    void *ptr;
    ompi_java_buffer_t *item;
    ompi_java_getReadPtr(&ptr, &item, env, buf, db, off, count, type, bType);
    MPI_Status status;
    int rc = MPI_File_write_all((MPI_File)fh, ptr, count, type, &status);
    ompi_java_exceptionCheck(env, rc);
    ompi_java_releaseReadPtr(ptr, item, buf, db);
    ompi_java_status_set(env, stat, &status);
}
Ejemplo n.º 6
0
void seissol::checkpoint::mpio::Wavefield::write(const void* header, size_t headerSize)
{
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Checkpoint backend: Writing.";

	// Write the header
	writeHeader(header, headerSize);

	// Save data
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);
	checkMPIErr(setDataView(file()));

	unsigned int totalIter = totalIterations();
	unsigned int iter = iterations();
	unsigned int count = dofsPerIteration();
	if (m_useLargeBuffer) {
		totalIter = (totalIter + sizeof(real) - 1) / sizeof(real);
		iter = (iter + sizeof(real) - 1) / sizeof(real);
		count *= sizeof(real);
	}
	unsigned long offset = 0;
	for (unsigned int i = 0; i < totalIter; i++) {
		if (i == iter-1)
			// Last iteration
			count = numDofs() - (iter-1) * count;

		checkMPIErr(MPI_File_write_all(file(), const_cast<real*>(&dofs()[offset]), count, MPI_DOUBLE, MPI_STATUS_IGNORE));

		if (i < iter-1)
			offset += count;
		// otherwise we just continue writing the last chunk over and over
		else if (i != totalIter-1)
			checkMPIErr(MPI_File_seek(file(), -count * sizeof(real), MPI_SEEK_CUR));
	}

	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
Ejemplo n.º 7
0
int savematrix_rows(MPI_File *fh, float *data, int numrows, int rank, int numtasks, int m, int n)
{
	MPI_Datatype darray;
	MPI_Status status;
	int gsizes[2] = {m, n};
	int distribs[2] = {MPI_DISTRIBUTE_BLOCK, MPI_DISTRIBUTE_BLOCK};
	int dargs[2] = {MPI_DISTRIBUTE_DFLT_DARG, MPI_DISTRIBUTE_DFLT_DARG};
	int psizes[2] = {4, 1};
	
	MPI_Type_create_darray(numtasks, rank, 2, gsizes, distribs, dargs, psizes, MPI_ORDER_C, MPI_FLOAT, &darray);
	MPI_Type_commit(&darray);
	
	MPI_File_set_view(*fh, 0, MPI_FLOAT, darray, "native", MPI_INFO_NULL);
	
	MPI_File_write_all(*fh, data, numrows*n, MPI_FLOAT, &status);
	
	MPI_Type_free(&darray);
	
	return 0;
}
Ejemplo n.º 8
0
int main(int argc, char **argv) {
    int ierr, rank, size;
    MPI_Offset offset;
    MPI_File   file;
    MPI_Status status;
    int npts=200;
    int start;
    int locnpts;
    float *data;
    MPI_Datatype viewtype;

    ierr = MPI_Init(&argc, &argv);
    ierr|= MPI_Comm_size(MPI_COMM_WORLD, &size);
    ierr|= MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    
    locnpts = npts/size;
    start = locnpts * rank;
    if (rank == size-1)
        locnpts = (npts-start);

    data = malloc(locnpts * sizeof(float));

    for (int i=0; i<locnpts; i++)
        data[i] = sin((start+i)*1.0*8*atan(1.)/npts);

    MPI_File_open(MPI_COMM_WORLD, "sine.dat", 
                  MPI_MODE_CREATE|MPI_MODE_WRONLY,
                  MPI_INFO_NULL, &file);

    /*  Other MPI-IO stuff */

    MPI_File_write_all(file, data, locnpts, MPI_FLOAT, &status);

    MPI_File_close(&file);
   
    free(data);
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 9
0
void MPIIO_WriteData(simulation_data *sim, char *Filename)
{
  int dimuids[3]={sim->global_dims[2], sim->global_dims[1], sim->global_dims[0]};
  int f, rc, ustart[3], ucount[3];
  MPI_Offset disp = 0;
  long offset;
  MPI_File      filehandle;
  MPI_Datatype  filetype;

  rc = MPI_File_open(sim->comm_cart, Filename, 
		MPI_MODE_CREATE | MPI_MODE_WRONLY,
                MPI_INFO_NULL, &filehandle);

  ustart[2] = sim->grid.Ncolumns * sim->coords[0];
  ustart[1] = sim->grid.Nrows * sim->coords[1];
  ustart[0] = 0;
  ucount[2] = sim->grid.Ncolumns;
  ucount[1] = sim->grid.Nrows;
  ucount[0] = sim->grid.Nlevels;

 // Create the subarray representing the local block
  MPI_Type_create_subarray(3, dimuids, ucount, ustart,
					MPI_ORDER_C, MPI_FLOAT, &filetype);
  MPI_Type_commit(&filetype);

	
  for(f=0; f < NFIELDS; f++)
    {
    MPI_File_set_view(filehandle, disp, MPI_FLOAT, filetype, "native", MPI_INFO_NULL);

    MPI_File_write_all(filehandle, sim->grid.data[f],
                       ucount[0]*ucount[1]*ucount[2],
                       MPI_FLOAT, MPI_STATUS_IGNORE);
    disp += sim->global_dims[2] * sim->global_dims[1] * sim->global_dims[0] * sizeof(float);
    }
  MPI_File_close(&filehandle);
  MPI_Type_free(&filetype);
}
Ejemplo n.º 10
0
int main(int argc, char *argv[])
{
    int iarrayOfSizes[2], iarrayOfSubsizes[2], iarrayOfStarts[2], ilocal_size;
    int nproc[2], periods[2], icoord[2];
    int m, n, i, j, wsize, wrank, crank, ndims, lrows, lcols, grow, gcol, err;
    MPI_Datatype filetype;
    MPI_File     fh;
    MPI_Comm     cartcomm;
    MPI_Info     info0, info3;
    double       t, topen, twrite, tclose, wrate;
    double       *local_array;
    char         nstripesStr[12], stripeUnitStr[12];
    int          nstripes = -1;
    int          stripeUnit = -1;
    MPI_Offset   headerSize = 0;

    MPI_Init(0,0);

    MPI_Comm_rank(MPI_COMM_WORLD, &wrank);

    /* Get global array size */
    m = n = 128;      /* Set default size */

    /* ioda [ n ] [ m ] [ nstripes ] [ stripeunit ] [ headersize ] */
    if (argc > 0) {
	if (argc > 1) m = atoi(argv[1]);
	if (argc > 2) n = atoi(argv[2]);
	if (argc > 3) nstripes = atoi(argv[3]);
	if (argc > 4) stripeUnit = atoi(argv[4]);
        if (argc > 5) headerSize = atoi(argv[5]);
	if (argc > 6) {
	    if (wrank == 0)
		fprintf(stderr,"Unrecognized argument %s\n", argv[6]);
	    MPI_Abort(MPI_COMM_WORLD,1);
	}
    }
    if (wrank == 0) printf("Matrix is [%d,%d]; file dir = %s\n", m, n, MYSCRATCHDIR );

    /* The default number of stripes = totalsize/1M */
    if (nstripes < 0) {
	nstripes = n * m * sizeof(double) / (1024*1024);
	if (nstripes < 1) nstripes = 1;
    }
    if (wrank == 0) printf("nstripes = %d, stripeUnit = %d, header size = %d\n",
                           nstripes, stripeUnit, (int)headerSize);

    /* Use topology routines to get decomposition and coordinates */
    MPI_Comm_size(MPI_COMM_WORLD, &wsize);
    nproc[0] = 0; nproc[1] = 0;
    ndims = 2;
    MPI_Dims_create(wsize, ndims, nproc);
    periods[0] = 0; periods[1] = 0;
    MPI_Cart_create(MPI_COMM_WORLD, ndims, nproc, periods, 1, &cartcomm);
    MPI_Comm_rank(cartcomm, &crank);
    MPI_Cart_coords(cartcomm, crank, ndims, icoord);

    iarrayOfSizes[0]    = m;
    iarrayOfSizes[1]    = n;
    iarrayOfSubsizes[0] = m/nproc[0];
    iarrayOfSubsizes[1] = n/nproc[1];
    iarrayOfStarts[0]   = icoord[0] * iarrayOfSubsizes[0];
    iarrayOfStarts[1]   = icoord[1] * iarrayOfSubsizes[1];

    /* Initialize my block of the data */
    ilocal_size = iarrayOfSubsizes[0] * iarrayOfSubsizes[1];
    lrows = iarrayOfSubsizes[0];
    lcols = iarrayOfSubsizes[1];
    local_array = (double *)malloc(lrows*lcols*sizeof(double));
    gcol  = iarrayOfStarts[1];
    grow = iarrayOfStarts[0];
    for (i=0; i<lrows; i++) {
	for (j=0; j<lcols; j++) {
	    local_array[j*lrows+i] = (grow+i) + (gcol+j)*m;
	}
    }

    /* Fortran order simply means the data is stored by columns */
    MPI_Type_create_subarray(ndims, iarrayOfSizes, iarrayOfSubsizes,
			     iarrayOfStarts, MPI_ORDER_FORTRAN, MPI_DOUBLE,
			     &filetype);
    MPI_Type_commit(&filetype);

    info0 = MPI_INFO_NULL;
    info3 = MPI_INFO_NULL;
    if (nstripes > 0 || stripeUnit > 0) {
	MPI_Info_create(&info0);
	if (nstripes > 0) {
	    snprintf(nstripesStr, sizeof(nstripesStr), "%d", nstripes);
	    MPI_Info_set(info0, "striping_factor", nstripesStr);
	    MPI_Info_set(info0, "cb_nodes", nstripesStr);
	}
	if (stripeUnit > 0) {
	    snprintf(stripeUnitStr, sizeof(stripeUnitStr), "%d", stripeUnit);
	    MPI_Info_set(info0, "striping_unit", stripeUnitStr);
	}
	MPI_Info_dup(info0, &info3);
	MPI_Info_set(info3, "romio_no_indep_rw", "true");

	/* Other hints to consider:
	   direct_io=true

	   The default cb_buffer_size is 16777216 , but is overridden by the
	   striping unit, which is smaller by default.
	*/
    }

    /* level - 3 */
    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-3.out",
			MPI_MODE_CREATE | MPI_MODE_RDWR, info3, &fh);
    topen = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "open testfile-3.out");

    if (headerSize > 0) {
        /* Simulate writing a header */
        if (wrank == 0) {
	    char *header;
            header = (char *)calloc(1,(size_t)headerSize);
            MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
            free(header);
        }
        MPI_Barrier(cartcomm);
    }

    MPI_File_set_view(fh, headerSize, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL);

    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_write_all(fh, local_array, ilocal_size, MPI_DOUBLE,
			     MPI_STATUS_IGNORE);
    twrite = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "collective write");

    err = MPI_File_close(&fh);
    tclose = MPI_Wtime() - t;
    /* tclose is the time for the write(s) + the close, in case the
       implementation delays (some of) the writes until the close */
    if (err != MPI_SUCCESS) myAbort(err, "close testfile-3.out");

    MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    if (twrite > 0)
	wrate = (double)m * (double)n * sizeof(double)/twrite;
    if (wrank == 0)
	printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
	       twrite, tclose, wrate);

    /* level - 0 */
    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-0.out",
			MPI_MODE_CREATE | MPI_MODE_RDWR, info0, &fh);
    topen = MPI_Wtime() - t;
    if (err != MPI_SUCCESS) myAbort(err, "open testfile-0.out");

    if (headerSize > 0) {
        /* Simulate writing a header */
        if (wrank == 0) {
	    char *header;
            header = (char *)calloc(1,(size_t)headerSize);
            MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE);
            free(header);
        }
        MPI_Barrier(cartcomm);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    t = MPI_Wtime();
    gcol = iarrayOfStarts[1];
    grow = iarrayOfStarts[0];
    for (j=0; j<lcols; j++) {
	MPI_Offset offset = headerSize +
	    ((MPI_Offset)(grow) + (MPI_Offset)(gcol+j)*m) * sizeof(double);
	err = MPI_File_write_at(fh, offset, local_array+j*lrows, lrows, MPI_DOUBLE,
				MPI_STATUS_IGNORE);
	if (err != MPI_SUCCESS) myAbort(err, "write at");
    }
    twrite = MPI_Wtime() - t;

    err = MPI_File_close(&fh);
    tclose = MPI_Wtime() - t;
    /* tclose is the time for the write(s) + the close, in case the
       implementation delays (some of) the writes until the close */
    if (err != MPI_SUCCESS) myAbort(err, "close testfile-0");

    MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    if (twrite > 0)
	wrate = (double)m * (double)n * sizeof(double)/twrite;
    if (wrank == 0)
	printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen,
	       twrite, tclose, wrate);

    if (info0 != MPI_INFO_NULL) {
	MPI_Info_free(&info0);
	MPI_Info_free(&info3);
    }
    free(local_array);
    MPI_Finalize();
    return 0;
}
Ejemplo n.º 11
0
int
main(int argc, char* argv[])
{
  int  i, rank, npes, bug=0;
  int buf[ng];
  MPI_File     thefile;
  MPI_Status   status;
  MPI_Datatype filetype;
  MPI_Comm     new_comm;
  MPI_Offset   offset=0;
  MPI_Info     info=MPI_INFO_NULL;
  int gsize[D],distrib[D],dargs[D],psize[D];
  int dims[D],periods[D],reorder;
  double t1,t2,mbs;
  double to1,to2,tc1,tc2;
  double et,eto,etc;
  double max_mbs,min_mbs,avg_mbs;
  double max_et,min_et,avg_et;
  double max_eto,min_eto,avg_eto;
  double max_etc,min_etc,avg_etc;
  char process_name[MPI_MAX_PROCESSOR_NAME + 1];
  char rr_blank[] = {"       "};
  char rr_empty[] = {"???????"};
  int  count;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &npes);
  if ( rank == 0 )
    {
     if ( argc < 2 )
       {
        printf(" ERROR: no filename given\n");
        bug++;
       }
     if ( npes == np )
       {
        printf(" file name: %s\n",argv[1]);
        printf(" total number of PE's: %3d\n",np);
        printf(" number of PE's in x direction: %4d\n",npx);
        printf(" number of PE's in y direction: %4d\n",npy);
        printf(" number of PE's in z direction: %4d\n",npz);
        printf(" global grid size: %dx%dx%d 4 byte integers (total %lld)\n",X,Y,Z,(unsigned long)X*Y*Z);
        printf("  local grid size: %dx%dx%d 4 byte integers (total %d)\n",nx,ny,nz,ng);
       }
     else
       {
        printf(" ERROR: total number of PE's must be %d\n",np);
        printf("        actual number of PE's was %d\n",npes);
        bug++;
       }
     if ( bug )
       {
        MPI_Abort(MPI_COMM_WORLD,-1);
       }
    }
 if ( MPI_Get_processor_name(process_name, &count) != MPI_SUCCESS)
   {
    sprintf(process_name, rr_empty);
   }
 else
   {
    if (count < MAX_RR_NAME) strncat(&process_name[count],rr_blank,MAX_RR_NAME-count);
    process_name[MAX_RR_NAME] = '\0';
   }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Info_create(&info);

/* allow multiple writers to write to the file concurrently */

/*MPI_Info_set(info,"panfs_concurrent_write","1");*/

/* use data aggregation */

/*MPI_Info_set(info,"romio_cb_write","enable"); */
/*MPI_Info_set(info,"romio_cb_write","disable");*/
/*MPI_Info_set(info,"romio_cb_read","enable"); */
/*MPI_Info_set(info,"romio_cb_read","disable");*/

/* use one aggregator/writer per node */

/*MPI_Info_set(info,"cb_config_list","*:1");*/

/* aggregators/writers per allocation: use this or the above (both work) */

/*i = ((npes-1)/8) + 1;
  sprintf(awpa,"%d",i);
  MPI_Info_set (info,"cb_nodes",awpa);*/

    
  for ( i=0; i<ng; i++ ) buf[i] = rank*10000 + (i+1)%1024;

  for ( i=0; i<D; i++ )
    {
     periods[i] = 1;  /* true */
    }

  reorder = 1;        /* true */

  dims[0] = npx;
  dims[1] = npy;
  dims[2] = npz;
     
  MPI_Cart_create(MPI_COMM_WORLD, D, dims, periods, reorder, &new_comm);

  for ( i=0; i<D; i++ )
    {
     distrib[i] = MPI_DISTRIBUTE_BLOCK;
     dargs[i]   = MPI_DISTRIBUTE_DFLT_DARG;
/*   psize[i]   = 0; */
    }

  gsize[0] = X;
  gsize[1] = Y;
  gsize[2] = Z;

  psize[0] = npx;
  psize[1] = npy;
  psize[2] = npz;

/*
  MPI_Dims_create(npes, D, psize);  

  printf("psize %d %d %d\n",psize[0],psize[1],psize[2]);
*/

  MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_FORTRAN, MPI_INT, &filetype);
/*MPI_Type_create_darray(npes, rank, D, gsize, distrib, dargs, psize, MPI_ORDER_C, MPI_INT, &filetype);              don't do this */

  MPI_Type_commit(&filetype);

  to1 = MPI_Wtime();
  MPI_File_open(new_comm, argv[1], MPI_MODE_WRONLY | MPI_MODE_CREATE, info, &thefile);
  to2 = MPI_Wtime();

  MPI_File_set_size(thefile, offset);

  MPI_File_set_view(thefile, offset, MPI_INT, filetype, "native", MPI_INFO_NULL);

  t1 = MPI_Wtime();
  for ( i=0; i<LOOP; i++)
    {
     MPI_File_write_all(thefile, buf, ng, MPI_INT, &status);
    }
  t2 = MPI_Wtime();

  tc1 = MPI_Wtime();
  MPI_File_close(&thefile);
  tc2 = MPI_Wtime();

  et  = (t2  - t1)/LOOP;
  eto = (to2 - to1)/LOOP;
  etc = (tc2 - tc1)/LOOP;

  mbs = (((double)(LOOP*X*Y*Z)*sizeof(int)))/(1000000.0*(t2-t1));

/*printf(" %s[%3d]    ET  %8.2f  %8.2f  %8.2f         %8.1f mbs\n", process_name, rank, t1, t2, t2-t1, mbs);*/

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Reduce(&mbs, &avg_mbs, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&mbs, &min_mbs, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&mbs, &max_mbs, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&et, &avg_et, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&et, &min_et, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&et, &max_et, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&eto, &avg_eto, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&eto, &min_eto, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&eto, &max_eto, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  MPI_Reduce(&etc, &avg_etc, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
  MPI_Reduce(&etc, &min_etc, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
  MPI_Reduce(&etc, &max_etc, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

  fflush(stdout);

  if ( rank == 0 )
    {
     mbs = avg_mbs/npes;
     printf("\n     average write rate: %9.1f mbs\n", mbs);
     printf("     minimum write rate: %9.1f mbs\n", min_mbs);
     printf("     maximum write rate: %9.1f mbs\n\n", max_mbs);
     avg_eto = avg_eto/npes;
     avg_et  = avg_et/npes;
     avg_etc = avg_etc/npes;
     printf("     open time:  %9.3f min %9.3f avg %9.3f max\n",min_eto,avg_eto,max_eto);  
     printf("     write time: %9.3f min %9.3f avg %9.3f max\n",min_et,avg_et,max_et);  
     printf("     close time: %9.3f min %9.3f avg %9.3f max\n\n",min_etc,avg_etc,max_etc);  
     fflush(stdout);
    }

  MPI_Finalize();
  
  return 0;
}
Ejemplo n.º 12
0
void step4(inst i, int r, int s)
{
    inst instance = i;
    int rank = r;
    int size = s;

    // Creation of the 2D torus we will then use
    MPI_Comm comm;
    int dim[2] = {instance.p, instance.q};
    int period[2] = {1, 1};
    int reorder = 0;
    int coord[2];
    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &comm);
    MPI_Cart_coords(comm, rank, 2, coord);


    grid global_grid;

    char type = 0;
    MPI_File input_file;

    // We start by reading the header of the file
    MPI_File_open(comm, instance.input_path, MPI_MODE_RDONLY, MPI_INFO_NULL, &input_file);
    MPI_File_read_all(input_file, &type, 1, MPI_CHAR, MPI_STATUS_IGNORE);

    if(type == 1)
    {
	if (rank == 0) fprintf(stderr, "Error: type 1 files are not supported in step 4\n");
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	exit(EXIT_FAILURE);
    }
	
    // we needed to swap the next 2 lines
    MPI_File_read_all(input_file, &(global_grid.n), 1, MPI_UINT64_T, MPI_STATUS_IGNORE);
    MPI_File_read_all(input_file, &(global_grid.m), 1, MPI_UINT64_T, MPI_STATUS_IGNORE);

#ifdef DEBUG
    if(rank == 0)
	printf("n, m = %zu %zu\n", global_grid.n, global_grid.m);
#endif


    if(!(global_grid.n % instance.p == 0 && global_grid.m % instance.q == 0))
    {
	if(rank == 0)
	    fprintf(stderr, "Error: please choose the grid parameters so they divide the grid of the cellular automaton. For example %zu %zu, but you need to move from %d procs to %zu\n", instance.p + (global_grid.n % instance.p), instance.q + (global_grid.m % instance.q), size, (instance.p + (global_grid.n % instance.p))*(instance.q + (global_grid.m % instance.q)));
	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();
	exit(EXIT_FAILURE);
    }

    size_t local_nrows = global_grid.n/instance.p;
    size_t local_ncols = global_grid.m/instance.q;
    
    // Now we create the data structures.
    int blocks[2] = {1, 2};
    MPI_Datatype types[2] = {MPI_BYTE, MPI_DOUBLE};
    MPI_Aint a_size = sizeof(cell2);
    MPI_Aint a_disp[3] = {offsetof(cell2, type), offsetof(cell2, u), offsetof(cell2, s)};

    MPI_Aint p_size = 17;
    MPI_Aint p_disp[3] = {0, 1, 9};

    MPI_Datatype p_tmp, a_tmp, p_cell, a_cell;

    // Aligned struct, memory representation
    MPI_Type_create_struct(2, blocks, a_disp, types, &a_tmp);
    MPI_Type_create_resized(a_tmp, 0, a_size, &a_cell);
    MPI_Type_commit(&a_cell);
	    
    // Packed struct, file-based representation
    MPI_Type_create_struct(2, blocks, p_disp, types, &p_tmp);
    MPI_Type_create_resized(p_tmp, 0, p_size, &p_cell);
    MPI_Type_commit(&p_cell);

    // Now, we create our matrix
    MPI_Datatype matrix;
    int sizes[2] = {global_grid.n, global_grid.m};
    int subsizes[2] = {local_nrows, local_ncols};
    int starts[2] = {0, 0};
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, p_cell, &matrix);
    MPI_Type_commit(&matrix);

    // We extend this matrix
    MPI_Datatype ematrix;
    int e_subsizes[2] = {2 + subsizes[0], 2 + subsizes[1]};
    int e_start[2] = {1, 1};
    MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, a_cell, &ematrix);
    MPI_Type_commit(&ematrix);
	

    // The next 3 types are for the export of the grid
    MPI_Datatype d_type;
    MPI_Type_create_resized(MPI_DOUBLE, 0, sizeof(cell2), &d_type);
    MPI_Type_commit(&d_type);
	

    MPI_Datatype d_matrix;
    MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &d_matrix);
    MPI_Type_commit(&d_matrix);

    MPI_Datatype d_rmatrix; // to go from the extended matrix with ghost zones to the other one
    MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, d_type, &d_rmatrix);
    MPI_Type_commit(&d_rmatrix);



    // Set file view for each element
    MPI_Offset grid_start;
    MPI_File_get_position(input_file, &grid_start);

	
    MPI_File_set_view(input_file, grid_start + global_grid.m*local_nrows*p_size*coord[0] + local_ncols*p_size*coord[1], p_cell, matrix, "native", MPI_INFO_NULL);

    // allocate the cell array we will use
    cell2 **cells;
    cells = malloc(2*sizeof(cell2 *));
    double *sensors;
	
    cells[1] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2));
    cells[0] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2));
    sensors = calloc(local_nrows*local_ncols, sizeof(double));
	
    MPI_File_read_all(input_file, cells[0], 1, ematrix, MPI_STATUS_IGNORE);

    MPI_File_close(&input_file);

#ifdef DEBUG
    for(size_t i = 1; i < 1+local_nrows; i++)
	for(size_t j = 1; j < 1+local_ncols; j++)
	    fprintf(stderr, "%d - %d %f\n", rank, cells[0][i*(2+local_ncols)+j].type, cells[0][i*(2+local_ncols)+j].u);
#endif

    MPI_Datatype l_row; // local row
    MPI_Type_contiguous(local_ncols, d_type, &l_row);
    MPI_Type_commit(&l_row);

    MPI_Datatype l_col; // local column. A bit trickier, we need a type_vector.
    MPI_Type_vector(local_nrows, 1, local_ncols+2, d_type, &l_col);
    MPI_Type_commit(&l_col);

	
    int top, bot, left, right;
    double sqspeed = 0;

    int curr = 0, next = 0;
    char *alldump = malloc(256);

    for(int s = 0; s < instance.iteration; s++)
    {
	// We will update cell[next], and use the data of cell[curr]
	curr = s % 2;
	next = (s+1) % 2;
	    
	// We copy the edges of the grid.
	// We first need the ranks of the neighbours

	MPI_Cart_shift(comm, 0, 1, &top, &bot);
	MPI_Cart_shift(comm, 1, 1, &left, &right);
	    

	// Then we need to update the edges of our local grid
	// Update top and bottom rows
	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u),               1, l_row, top, 0,
		     &(cells[curr][(local_ncols+2)*(local_nrows+1)+1].u), 1, l_row, bot, 0,
		     comm, MPI_STATUS_IGNORE);
	
	MPI_Sendrecv(&(cells[curr][(local_ncols+2)*(local_nrows)+1].u),   1, l_row, bot, 0,
		     &(cells[curr][1].u),                                 1, l_row, top, 0,
		     comm, MPI_STATUS_IGNORE);
	
	// Update left and right
	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u),             1, l_col, left,  0,
		     &(cells[curr][1*(local_ncols+2)+local_ncols+1].u), 1, l_col, right, 0,
		     comm, MPI_STATUS_IGNORE);

	MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+local_ncols].u),   1, l_col, right, 0,
		     &(cells[curr][1*(local_ncols+2)].u),               1, l_col, left,  0,
		     comm, MPI_STATUS_IGNORE);



	// We compute the update of the grid
	for(size_t i = 1; i < 1+local_nrows; i++)
	{
	    for(size_t j = 1; j < 1+local_ncols; j++)
	    {
		if(instance.step < 2 || cells[next][j+i*(2+local_ncols)].type != 1)
		{
		    // If walls we do not do anything
		    sqspeed = cells[0][j+i*(2+local_ncols)].s * cells[0][j+i*(2+local_ncols)].s;
		    cells[next][j+i*(2+local_ncols)].u = cells[curr][j+i*(2+local_ncols)].u + (cells[curr][j+i*(2+local_ncols)].v * instance.dt);
		    cells[next][j+i*(2+local_ncols)].v = cells[curr][j+i*(2+local_ncols)].v + sqspeed * (cells[curr][j+(i+1)*(2+local_ncols)].u + cells[curr][j+(i-1)*(2+local_ncols)].u + cells[curr][(j+1) + i*(2+local_ncols)].u + cells[curr][(j-1) + i*(2+local_ncols)].u - (4 * cells[curr][j+i*(2+local_ncols)].u)) * instance.dt;

		    if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2)
		    {
			// Case of sensors
			sensors[(j-1)+(i-1)*local_ncols] += cells[next][j+i*(2+local_ncols)].u * cells[next][j+i*(2+local_ncols)].u;
		    }
		}
		    
	    }
	}

	if(instance.alldump != NULL && s % instance.frequency == 0)
	{
	    MPI_File dump_file;

	    sprintf(alldump, instance.alldump, (s / instance.frequency));
	    MPI_File_open(comm, alldump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &dump_file);
		
	    MPI_File_set_view(dump_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL);
		
	    MPI_File_write_all(dump_file, &(cells[curr][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE);
	    MPI_File_close(&dump_file);


	}
    }

	
    if(instance.lastdump != NULL)
    {
	// bon, comment on fait ça ? peut être qu'en faisant un resize ça marche ?
	MPI_File last_file;
	MPI_File_open(comm, instance.lastdump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &last_file);
	MPI_File_set_view(last_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); // déjà, il y a un grid_strat en trop, d_type ou MPI_DOUBLE ?

	MPI_File_write_all(last_file, &(cells[next][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE);
	MPI_File_close(&last_file);
    }

    if(instance.step == 3 && instance.sensors != NULL)
    {
	MPI_File sensor_file;
	MPI_File_open(comm, instance.sensors, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &sensor_file);


	MPI_Datatype string;
	MPI_Type_contiguous(1024, MPI_CHAR, &string);
	MPI_Type_commit(&string);
	
	char text[1024];
	for(size_t i = 1; i < 1+local_nrows; i++)
	{
	    for(size_t j = 1; j < 1+local_ncols; j++)
	    {
		if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2)
		{
		    memset(text,0,sizeof(text));
		    sprintf(text, "%zu %zu %f\r\n", (i-1)+coord[0]*local_nrows, (j-1)+coord[1]*local_ncols, sensors[(j-1)+(i-1)*local_ncols]);
		    MPI_File_write(sensor_file, text, 1, string, MPI_STATUS_IGNORE);
		}
		    
	    }
	}
	    
	MPI_Type_free(&string);
	MPI_File_close(&sensor_file);
    }
	

    // Some cleaning
    free(cells);
    free(alldump);
    MPI_Type_free(&a_cell);
    MPI_Type_free(&p_cell);
    MPI_Type_free(&matrix);
    MPI_Type_free(&ematrix);
    MPI_Type_free(&d_type);
    MPI_Type_free(&d_matrix);
    MPI_Type_free(&d_rmatrix);
    MPI_Type_free(&l_row);
    MPI_Type_free(&l_col);
}
Ejemplo n.º 13
0
int main(int argc, char *argv[]) {

  int my_id, nprocs;
  int mpi_dims[4]; 
  int period[4] = {0, 0, 0, 0};
  int coords[4];

  int dimsf[4] = {nbands, gpts, gpts, gpts};
  int count[4];
  int offset[4];
  int ndims = 4;

  double t0, t1;
#ifdef PAPI
  PAPI_dmem_info_t dmem;
  double mem1, mem2, mem1_max, mem2_max, mem1_ave, mem2_ave;
  int papi_err;
#endif

  double *my_data;

  MPI_Comm cart_comm;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

  assert(argc == 5);
  for (int i=1; i < argc; i++)
    mpi_dims[i-1] = atoi(argv[i]);

  assert(mpi_dims[0] * mpi_dims[1] * mpi_dims[2] * mpi_dims[3] == nprocs);
  MPI_Cart_create(MPI_COMM_WORLD, 4, mpi_dims, period, 0, &cart_comm);
  MPI_Comm_rank(cart_comm, &my_id);

  MPI_Cart_coords(cart_comm, my_id, 4, coords);

  assert(nbands % mpi_dims[0] == 0);
  for (int i=1; i < 4; i++)
    assert(gpts % mpi_dims[i] == 0);

  int total_size = nbands*gpts*gpts*gpts;
  count[0] = nbands / mpi_dims[0];
  offset[0] = coords[0] * count[0];
  int data_size = count[0];
  for (int i=1; i < 4; i++)
    {
      count[i] = gpts/mpi_dims[i];
      offset[i] = coords[i] * count[i];
      data_size *= count[i];
    }

  my_data = (double *) malloc(data_size * sizeof(double));
  for (int i=0; i < data_size; i++)
    my_data[i] = my_id;

  MPI_Info info;
  MPI_File fp;
  MPI_Datatype filetype;
  // MPI_Info_set(info, "cb_nodes", "64");

  MPI_Barrier(MPI_COMM_WORLD);
#ifdef PAPI
  papi_err = PAPI_get_dmem_info(&dmem);
  if (papi_err != PAPI_OK)
    printf("PAPI_ERR\n");
  mem1 = (double)dmem.size / 1024.0;
  MPI_Reduce(&mem1, &mem1_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm);
  MPI_Reduce(&mem1, &mem1_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
  mem1_ave /= nprocs;
#endif
  t0 = MPI_Wtime();
  MPI_File_open(MPI_COMM_WORLD, "test.dat",
                  MPI_MODE_CREATE|MPI_MODE_WRONLY,
                  MPI_INFO_NULL, &fp);

  MPI_Type_create_subarray(ndims, dimsf, count, offset, MPI_ORDER_C, 
			   MPI_DOUBLE, &filetype);
  MPI_Type_commit(&filetype);
  MPI_File_set_view(fp, 0, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL);

  MPI_File_write_all(fp, my_data, data_size, MPI_DOUBLE, MPI_STATUS_IGNORE);

  MPI_Type_free(&filetype);
  MPI_File_close(&fp);
  MPI_Barrier(MPI_COMM_WORLD);
  t1 = MPI_Wtime();
#ifdef PAPI
  papi_err = PAPI_get_dmem_info(&dmem);
  if (papi_err != PAPI_OK)
    printf("PAPI_ERR\n");
  mem2 = (double)dmem.size/ 1024.0;
  MPI_Reduce(&mem2, &mem2_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm);
  MPI_Reduce(&mem2, &mem2_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm);
  mem2_ave /= nprocs;
#endif
  if (my_id == 0)
    {
      printf("IO time %f (%f) MB %f s\n", 
             total_size * 8/(1024.0*1024.0), 
             data_size * 8/(1024.0*1024.0), t1-t0);
#ifdef PAPI
      printf("Memory usage max (ave): %f (%f) %f (%f) \n", 
              mem1_max, mem1_ave, mem2_max, mem2_ave);
#endif
    }
      
  MPI_Finalize();
}
Ejemplo n.º 14
0
int
main (int argc, char **argv)
{
  MPI_Request request;
  MPI_File fh;
  MPI_Datatype ftype;
  MPI_Offset offset;
  MPI_Status status;
  int rank, wsize, fsize, i;
  char file_name[128];
  int buf[BUF_SIZE * TEST_OPS];
  int count;

  MPI_Init (&argc, &argv);

  MPI_Comm_size (MPI_COMM_WORLD, &wsize);
  MPI_Comm_rank (MPI_COMM_WORLD, &rank);

  strcpy (file_name, argv[0]);
  strcat (file_name, ".tmp");

  MPI_File_open (MPI_COMM_WORLD, file_name, MPI_MODE_RDWR | MPI_MODE_CREATE,
		 MPI_INFO_NULL, &fh);

  fsize = wsize * BUF_SIZE * TEST_OPS;
  MPI_File_preallocate (fh, fsize);

  memset (buf, 0, BUF_SIZE * TEST_OPS);
  offset = 0;
  count = BLOCK_SIZE;

  for (i = 0; i < TEST_OPS; i++)
    {
      offset = i * BLOCK_SIZE + (rank * BLOCK_SIZE * TEST_OPS);

      MPI_File_seek (fh, offset, MPI_SEEK_SET);
      MPI_File_write (fh, buf, count, MPI_INT, &status);

      MPI_File_seek (fh, offset, MPI_SEEK_SET);
      MPI_File_read (fh, buf, count, MPI_INT, &status);
    }

  for (i = 0; i < TEST_OPS; i++)
    {
      offset = i * BLOCK_SIZE + (rank * BLOCK_SIZE * TEST_OPS);
      MPI_File_write_at (fh, offset, buf, count, MPI_INT, &status);
      MPI_File_read_at (fh, offset, buf, count, MPI_INT, &status);
    }

  MPI_Type_vector (fsize / BLOCK_SIZE, BLOCK_SIZE, BLOCK_SIZE * wsize,
		   MPI_INT, &ftype);
  MPI_Type_commit (&ftype);

  offset = rank * BLOCK_SIZE * TEST_OPS;
  count = BLOCK_SIZE * TEST_OPS;

  MPI_File_set_view (fh, offset, MPI_INT, ftype, "native", MPI_INFO_NULL);
  MPI_File_write_all (fh, buf, count, MPI_INT, &status);
  MPI_File_read_all (fh, buf, count, MPI_INT, &status);

  MPI_File_close (&fh);

  MPI_Finalize ();
}
int main (int argc, char *argv[]) {

	double all_start, all_end, all_total;
	double comm_start, comm_end, comm_total;
	double ior_start, ior_end, ior_total, iow_start, iow_end, iow_total, io_total;
	double compute_start, compute_end, compute_total;

	all_total = 0;
	comm_total = 0;
	io_total = 0;
	ior_total = 0;
	iow_total = 0;
	compute_total = 0;



	int i, j, k;
	//	Initial MPI environment
	/*
		rank: the ID of each process
		size: # total available process
	*/
	int rank, size;

	MPI_Init(&argc, &argv);

	all_start = MPI_Wtime();


	MPI_Comm_size(MPI_COMM_WORLD, &size);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);

	//	Argument exception handle
	if(argc < 4) {
		if (rank == ROOT) {
			fprintf(stderr, "Insufficient args\n");
			fprintf(stderr, "Usage: %s N input_file", argv[0]);
		}
		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Finalize();
		return 0;
	}

	//	Argument assigned
	const int N = atoi(argv[1]);
	const char *input = argv[2];
	const char *output = argv[3];

	//	Part 1: Determine arguments of the I/O, and do MPI I/O
	/*
		num_per_node:	# number stored in each process
		rank_first_add:	the first process which need to add MAX_INT
		num_first_add:	# number that the first process which need to add
		read_file:		indicate the process if it need to read file
		*node_arr:		local number array of each process
	*/
	MPI_File ifh;
	MPI_Status istatus;	
	int num_per_node, rank_first_add, num_first_add, read_file, *node_arr;
	read_file=0;
	
	compute_start = MPI_Wtime();
	if(N<size){ //	N < #process
		num_per_node = 1;
		num_first_add = 1;
		rank_first_add = N;
		if(rank<rank_first_add)
			read_file = 1;
	}
	else{ //	N >= #process
		if(N%size){ //	If N can't be divided into the # process
			num_per_node = (N/size) + 1;
			rank_first_add = N/num_per_node; // # element to be add in the first rank
			num_first_add = num_per_node - (N%num_per_node); 
			if(rank<=rank_first_add)
				read_file = 1;
		}
		else{ //	If N can be divided into # process
			num_per_node = N/size;
			rank_first_add = size; // no need to add
			num_first_add = 0;
			read_file = 1;
		}
	}
	compute_end = MPI_Wtime();
	compute_total += (compute_end - compute_start);

	node_arr = (int*) malloc(num_per_node * sizeof(int)); // store the N/P numbers in each node
	
	ior_start = MPI_Wtime();
	MPI_File_open(MPI_COMM_WORLD, input, MPI_MODE_RDONLY, MPI_INFO_NULL, &ifh);
	if(read_file)
		MPI_File_set_view(ifh, rank*num_per_node*sizeof(int), MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
	else
		MPI_File_set_view(ifh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
	if(read_file && rank!=rank_first_add)
		MPI_File_read_all(ifh, node_arr, num_per_node, MPI_INT, &istatus);
	else if(read_file && rank==rank_first_add){
		MPI_File_read_all(ifh, node_arr, (num_per_node-num_first_add), MPI_INT, &istatus);
		for(i=(num_per_node-num_first_add); i<num_per_node; ++i)
			node_arr[i] = MAX_INT;
	}
	else{
		MPI_File_read_all(ifh, node_arr, 0, MPI_INT, &istatus);
		for(i=0; i<num_per_node; ++i)
			node_arr[i] = MAX_INT;
	}
	MPI_File_close(&ifh);
	ior_end = MPI_Wtime();
	ior_total = (ior_end - ior_start);
	io_total += ior_total;

	MPI_Barrier(MPI_COMM_WORLD);

	// Part 2: Start odd-even sort algorithm
	MPI_Status status;
	int *next_arr, *merge_arr, *ori_arr;
	next_arr = (int*) malloc(num_per_node * sizeof(int));

	compute_start = MPI_Wtime();
	Mergesort(num_per_node, node_arr);
	compute_end = MPI_Wtime();
	compute_total += (compute_end - compute_start);

	MPI_Barrier(MPI_COMM_WORLD);
	for(i=0; i<size; ++i){
		if(i%2==0){ //	Even-phase
			if(rank%2){ //	Odd-rank process: # 1, 3, 5...(Sender) => P -> P-1
				
				comm_start = MPI_Wtime();
				MPI_Send(node_arr, num_per_node, MPI_INT, rank-1, 8, MPI_COMM_WORLD);
				comm_end = MPI_Wtime();
				comm_total += (comm_end - comm_start);

				comm_start = MPI_Wtime();
				MPI_Recv(node_arr, num_per_node, MPI_INT, rank-1, 8, MPI_COMM_WORLD, &status);
				comm_end = MPI_Wtime();
				comm_total += (comm_end - comm_start);
			}
			else{ // Even-rank process: # 0, 2, 4...(Receiver) => P-1 <- P
				if(rank!=size-1){
					
					comm_start = MPI_Wtime();
					MPI_Recv(next_arr, num_per_node, MPI_INT, rank+1, 8, MPI_COMM_WORLD, &status);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);

					compute_start = MPI_Wtime();
					merge_arr = (int*) malloc(num_per_node * 2 * sizeof(int));
					for(j=0; j<num_per_node; ++j){
						merge_arr[j] = node_arr[j];
						merge_arr[j+num_per_node] = next_arr[j];
					}
					Mergesort(num_per_node*2, merge_arr);
					for(j=0; j<num_per_node; ++j){
						node_arr[j] = merge_arr[j];
						next_arr[j] = merge_arr[j+num_per_node];
					}
					free(merge_arr);
					compute_end = MPI_Wtime();
					compute_total += (compute_end - compute_start);


					comm_start = MPI_Wtime();
					MPI_Send(next_arr, num_per_node, MPI_INT, rank+1, 8, MPI_COMM_WORLD);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);
				}
			}
		}
		else{ //	Odd-phase
			if(rank%2==0){ //	Even-rank process: # 0, 2, 4... (Sender) => Q -> Q-1
				if(rank!=0){

					comm_start = MPI_Wtime();
					MPI_Send(node_arr, num_per_node, MPI_INT, rank-1, 8, MPI_COMM_WORLD);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);


					comm_start = MPI_Wtime();
					MPI_Recv(node_arr, num_per_node, MPI_INT, rank-1, 8, MPI_COMM_WORLD, &status);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);
				}
			}
			else{ //	Odd-rank process: # 1, 3, 5... (Receiver) => Q-1 <- Q
				if(rank!=size-1){


					comm_start = MPI_Wtime();
					MPI_Recv(next_arr, num_per_node, MPI_INT, rank+1, 8, MPI_COMM_WORLD, &status);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);

					compute_start = MPI_Wtime();
					merge_arr = (int*) malloc(num_per_node * 2 * sizeof(int));
					for(j=0; j<num_per_node; ++j){
						merge_arr[j] = node_arr[j];
						merge_arr[j+num_per_node] = next_arr[j];
					}
					Mergesort(num_per_node*2, merge_arr);
					for(j=0; j<num_per_node; ++j){
						node_arr[j] = merge_arr[j];
						next_arr[j] = merge_arr[j+num_per_node];
					}
					free(merge_arr);
					compute_end = MPI_Wtime();
					compute_total += (compute_end - compute_start);

					comm_start = MPI_Wtime();
					MPI_Send(next_arr, num_per_node, MPI_INT, rank+1, 8, MPI_COMM_WORLD);
					comm_end = MPI_Wtime();
					comm_total += (comm_end - comm_start);
				}
			}
		}
	}
	MPI_Barrier(MPI_COMM_WORLD);
	free(next_arr);

	MPI_File ofh;
	MPI_Status ostatus;
	

	iow_start = MPI_Wtime();
	MPI_File_open(MPI_COMM_WORLD, output, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &ofh);
	if(read_file)
		MPI_File_set_view(ofh, rank*num_per_node*sizeof(int), MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
	else
		MPI_File_set_view(ofh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
	if(read_file && rank!=rank_first_add)
		MPI_File_write_all(ofh, node_arr, num_per_node, MPI_INT, &ostatus);
	else if(read_file && rank==rank_first_add)
		MPI_File_write_all(ofh, node_arr, (num_per_node-num_first_add), MPI_INT, &ostatus);
	else
		MPI_File_write_all(ofh, node_arr, 0, MPI_INT, &ostatus);

	MPI_File_close(&ofh);
	iow_end = MPI_Wtime();
	iow_total = (iow_end - iow_start);
	io_total += iow_total;

	

	all_end = MPI_Wtime();
	all_total = (all_end - all_start);

	printf("Rank:%d (All:%lf, I:%lf, W:%lf, I/O:%lf, Comm:%lf, Compute:%lf)\n", rank, all_total,
		 ior_total, iow_total, io_total, comm_total, compute_total);

	MPI_Barrier(MPI_COMM_WORLD);
	MPI_Finalize();

	return 0;
}
Ejemplo n.º 16
0
int main(int argc, char **argv)
{
    MPI_Datatype newtype;
    int i, ndims, array_of_gsizes[3], array_of_distribs[3];
    int order, nprocs, len, flag, err;
    int array_of_dargs[3], array_of_psizes[3];
    int *readbuf, *writebuf, bufcount, mynod;
    char filename[1024];
    MPI_File fh;
    MPI_Status status;
    MPI_Aint size_with_aint;
    MPI_Offset size_with_offset;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!mynod) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    printf("\n*#  Usage: large_array -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
	printf("This program creates a 4 Gbyte file. Don't run it if you don't have that much disk space!\n");
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }

/* create the distributed array filetype */
    ndims = 3;
    order = MPI_ORDER_C;

    array_of_gsizes[0] = 1024;
    array_of_gsizes[1] = 1024;
    array_of_gsizes[2] = 4*1024/sizeof(int);

    array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
    array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;

    array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
    array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;

    for (i=0; i<ndims; i++) array_of_psizes[i] = 0;
    MPI_Dims_create(nprocs, ndims, array_of_psizes);

/* check if MPI_Aint is large enough for size of global array. 
   if not, complain. */

    size_with_aint = sizeof(int);
    for (i=0; i<ndims; i++) size_with_aint *= array_of_gsizes[i];
    size_with_offset = sizeof(int);
    for (i=0; i<ndims; i++) size_with_offset *= array_of_gsizes[i];
    if (size_with_aint != size_with_offset) {
        printf("Can't use an array of this size unless the MPI implementation defines a 64-bit MPI_Aint\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_Type_create_darray(nprocs, mynod, ndims, array_of_gsizes, 
			   array_of_distribs, array_of_dargs,
			   array_of_psizes, order, MPI_INT, &newtype);
    MPI_Type_commit(&newtype);

/* initialize writebuf */

    MPI_Type_size(newtype, &bufcount);
    bufcount = bufcount/sizeof(int);
    writebuf = (int *) malloc(bufcount * sizeof(int));
    if (!writebuf) printf("Process %d, not enough memory for writebuf\n", mynod);
    for (i=0; i<bufcount; i++) writebuf[i] = mynod*1024 + i;

    /* write the array to the file */
    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);
    MPI_File_write_all(fh, writebuf, bufcount, MPI_INT, &status);
    MPI_File_close(&fh);

    free(writebuf);

    /* now read it back */
    readbuf = (int *) calloc(bufcount, sizeof(int));
    if (!readbuf) printf("Process %d, not enough memory for readbuf\n", mynod);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, 
                  MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);
    MPI_File_read_all(fh, readbuf, bufcount, MPI_INT, &status);
    MPI_File_close(&fh);

    /* check the data read */
    flag = 0;
    for (i=0; i<bufcount; i++) 
	if (readbuf[i] != mynod*1024 + i) {
	    printf("Process %d, readbuf=%d, writebuf=%d\n", mynod, readbuf[i], mynod*1024 + i);
            flag = 1;
	}
    if (!flag) printf("Process %d: data read back is correct\n", mynod);

    MPI_Type_free(&newtype);
    free(readbuf);

    MPI_Barrier(MPI_COMM_WORLD);
    if (!mynod) {
	err = MPI_File_delete(filename, MPI_INFO_NULL);
	if (err == MPI_SUCCESS) printf("file deleted\n");
    }

    MPI_Finalize();
    return 0;
}
Ejemplo n.º 17
0
void parallel_readwrite(char *file_name, void *dump_buffer,
                            int type_of_file, int is_write, long long offset)
{
#if MPI && DO_PARALLEL_WRITE
  MPI_File fh;
  MPI_Status status;
  MPI_Datatype mpi_elementary_type, mpi_file_type;
  int file_open_error, file_write_error ;
  int error_string_length;
  char error_string[BUFSIZ];
  MPI_Offset file_size;
  int count;
  void *mpi_buffer;
  size_t mpi_buffer_size;
  int mode;
  MPI_Offset mpi_offset;
    
  MPI_Barrier(MPI_COMM_WORLD);
  
  if (is_write) {
    mode = MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND;
  }
  else {
    mode = MPI_MODE_RDONLY;
  }
  file_open_error = MPI_File_open(MPI_COMM_WORLD, file_name,
                                  mode,
                                  MPI_INFO_NULL, &fh);
  if (file_open_error != MPI_SUCCESS) {
    MPI_Error_string(file_open_error, error_string,
                     &error_string_length);
    fprintf(stderr, "parallel_readwrite(): error opening file: %3d: %s\n", mpi_rank, error_string);
    MPI_Abort(MPI_COMM_WORLD, file_open_error);
    
    /* It is still OK to abort, because we have failed to
     open the file. */
    
  }
  else {
    
//    if (i_am_the_master)
//      chmod(file_name, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
    if (offset < 0L) {
      if(is_write) {
        MPI_File_get_position(fh, &mpi_offset);
        offset = mpi_offset;
      }
      else {
        offset = 0L;
      }
    }
    MPI_Barrier(MPI_COMM_WORLD);
    //differentiate data type and buffers involved based on file type
    if( DUMP_FILE == type_of_file ) {
        mpi_elementary_type = MPI_DUMP_TYPE;
        mpi_file_type = dump_file_type;
        mpi_buffer = (void*)dump_buffer;
        mpi_buffer_size = dump_buffer_size;
    }
    else if( GDUMP_FILE == type_of_file){
        mpi_elementary_type = MPI_GDUMP_TYPE;
        mpi_file_type = gdump_file_type;
        mpi_buffer = (void*)gdump_buffer;
        mpi_buffer_size = gdump_buffer_size;
    }
    else if( GDUMP2_FILE == type_of_file){
      mpi_elementary_type = MPI_GDUMP2_TYPE;
      mpi_file_type = gdump2_file_type;
      mpi_buffer = (void*)gdump2_buffer;
      mpi_buffer_size = gdump2_buffer_size;
    }
    else if( RDUMP_FILE == type_of_file){
        mpi_elementary_type = MPI_RDUMP_TYPE;
        mpi_file_type = rdump_file_type;
        mpi_buffer = (void*)rdump_buffer;
        mpi_buffer_size = rdump_buffer_size;
    }
    else if( FDUMP_FILE == type_of_file){
      mpi_elementary_type = MPI_FDUMP_TYPE;
      mpi_file_type = fdump_file_type;
      mpi_buffer = (void*)fdump_buffer;
      mpi_buffer_size = fdump_buffer_size;
    }
    else {
        if(i_am_the_master)
            fprintf(stderr, "Unknown file type %d\n", type_of_file);
        MPI_File_close(&fh);
        MPI_Finalize();
        exit(2);
    }
    MPI_File_set_view(fh, offset, mpi_elementary_type, mpi_file_type, "native", MPI_INFO_NULL);
    if (is_write) {
      file_write_error =
      MPI_File_write_all(fh, mpi_buffer, mpi_buffer_size, mpi_elementary_type,
                         &status);
    }
    else {
      file_write_error =
      MPI_File_read_all(fh, mpi_buffer, mpi_buffer_size, mpi_elementary_type,
                         &status);
    }
    if (file_write_error != MPI_SUCCESS) {
      MPI_Error_string(file_write_error, error_string,
                       &error_string_length);
      fprintf(stderr, "parallel_readwrite(): error %s file: %3d: %s\n",
              (is_write)?("writing"):("reading"), mpi_rank, error_string);
      MPI_File_close(&fh);
      //if (i_am_the_master) MPI_File_delete(file_name, MPI_INFO_NULL);
      MPI_Finalize();
      exit(1);
    }
//    MPI_Get_count(&status, MPI_FLOAT, &count);
//    MPI_File_get_size(fh, &file_size);
//    if(1) {
//      printf("%3d: wrote %d floats, expected to write %lld floats\n", mpi_rank, count, (long long int)dump_buffer_size);
//      printf("%3d: file size is %lld bytes, header-related offset is %lld\n", mpi_rank, file_size, offset);
//    }

    MPI_File_close(&fh);
  }
#endif
}
Ejemplo n.º 18
0
void
qpb_write_spinor(qpb_spinor_field spinor_field, char fname[])
{
  MPI_Datatype mpi_dtype_spinor_float, filetype;
  MPI_Type_contiguous(2*NC*NS, MPI_FLOAT, &mpi_dtype_spinor_float);
  MPI_Type_commit(&mpi_dtype_spinor_float);

  int starts[ND], l_dim[ND], g_dim[ND];
  for(int i=0; i<ND; i++)
    {
      starts[i] = problem_params.coords[i]*problem_params.l_dim[i];
      l_dim[i] = problem_params.l_dim[i];
      g_dim[i] = problem_params.g_dim[i];
    };

  int ierr = MPI_Type_create_subarray(ND, g_dim, l_dim, starts, MPI_ORDER_C, 
				      mpi_dtype_spinor_float, &filetype);
  MPI_Type_commit(&filetype);

  MPI_File fhandle;
  ierr = MPI_File_open(MPI_COMM_WORLD, fname, MPI_MODE_WRONLY | MPI_MODE_CREATE,
		       MPI_INFO_NULL, &fhandle);
  if(ierr != MPI_SUCCESS)
    {
      if(am_master)
	{
	  fprintf(stderr, "%s: MPI_File_open() returned in error\n", fname);
	  exit(QPB_FILE_ERROR);
	}
    }

  ierr = MPI_File_set_view(fhandle, 0, mpi_dtype_spinor_float, filetype, 
			   "native", MPI_INFO_NULL);
  if(ierr != MPI_SUCCESS)
    {
      if(am_master)
	{
	  fprintf(stderr, "%s: MPI_File_set_view() returned in error\n", fname);
	  exit(QPB_FILE_ERROR);
	}
    }

  void *buffer = qpb_alloc(problem_params.l_vol*sizeof(qpb_spinor_float));

  for(int v=0; v<problem_params.l_vol; v++)
    for(int sp=0; sp<NC*NS; sp++)
	  {
	    ((float *) buffer)[v*NC*NS*2 + sp*2] = spinor_field.bulk[v][sp].re;
	    ((float *) buffer)[v*NC*NS*2 + sp*2 + 1] = spinor_field.bulk[v][sp].im;
	  }

  if(!qpb_is_bigendian())
    qpb_byte_swap_float(buffer, problem_params.l_vol*NC*NS*2);

  MPI_Status status;
  ierr = MPI_File_write_all(fhandle, buffer, problem_params.l_vol, 
			    mpi_dtype_spinor_float, &status);
  if(ierr != MPI_SUCCESS)
    {
      if(am_master)
       	{
	  fprintf(stderr, "%s: MPI_File_read() returned in error\n", fname);
	  exit(QPB_FILE_ERROR);
	}
    }

    
  ierr = MPI_File_close(&fhandle);
  if(ierr != MPI_SUCCESS)
    {
      if(am_master)
       	{
	  fprintf(stderr, "%s: MPI_File_close() returned in error\n", fname);
	  exit(QPB_FILE_ERROR);
	}
    }
  free(buffer);

  return;
}
Ejemplo n.º 19
0
int main(int argc, char** argv)
{
  int my_rank, p;
  int i, dest;
  mpz_t currentPrime;
  unsigned long int product;
  sscanf(argv[1], "%lu", &product);
  int secondFactor = 0;
  int bcastStatus;
  int equals;

  /** GMP library variables **/
  mpz_t nextPrimeNumber;
  mpz_t testFactor;
  mpz_init(nextPrimeNumber);
  mpz_init_set_str (nextPrimeNumber, argv[1], 10);
  mpz_init(testFactor);
  mpz_init_set_ui(currentPrime, 2);
  mpz_nextprime(nextPrimeNumber, nextPrimeNumber);
  mpz_t testProduct;
  mpz_init(testProduct);

  /** MPI Initialization **/
  MPI_Request finalValue;
  MPI_File out;
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &p);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  MPI_Status status;

  /** Get Ready to receive a factor if another process finds one */
  MPI_Irecv(&secondFactor, 1, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &finalValue);
  
  /** Prepare initial offset for each process **/
  for (i=0 ; i < my_rank ; i++) {
    mpz_nextprime(currentPrime, currentPrime);
  }
  /** Start Timing **/
  double start = MPI_Wtime(), diff;
  while (!secondFactor) {
    /** Check if another process has found the factors **/
    MPI_Test (&finalValue, &bcastStatus, &status);
    if(bcastStatus) {
      /** Somebody else has found the factors, we are done **/
      MPI_Wait(&finalValue, &status);
      break;
    }
      /** Skip P primes before checking again **/
    for (i=0 ; i < p ; i++) {
      mpz_nextprime(currentPrime, currentPrime);
    }
    
    /** Brute force check if the current working prime is a factor of the input number **/
    for (mpz_set_ui(testFactor , 2) ; mpz_get_ui(testFactor) <= mpz_get_ui(currentPrime); mpz_nextprime(testFactor, testFactor)) {
      /** Check if another process has found the factors **/
      MPI_Test (&finalValue, &bcastStatus, &status);
      if(bcastStatus) {
        MPI_Wait(&finalValue, &status);
        break;
      }
      mpz_mul_ui(testProduct, currentPrime, mpz_get_ui(testFactor));
      equals = mpz_cmp_ui(testProduct, product);
      if (equals == 0){
        /** We've found the factor, find the second number, secnd it to the other processes  **/
        secondFactor = mpz_get_ui(testFactor);
        printf("done by process %d, factors are %lu and %d \n", my_rank, mpz_get_ui(currentPrime), secondFactor);
        fflush(stdout);
        for (dest = 0 ; dest < p ; dest++) {
          if (dest != my_rank) {
            MPI_Send(&secondFactor, 1, MPI_UNSIGNED_LONG, dest, 0, MPI_COMM_WORLD);
          }
        }
      }
    }
  }

  diff = MPI_Wtime() - start;
  /** End Timing **/

  /** Prepare file contents **/
  char fileName[200], fileContents[200];
  sprintf(fileName, "time_%lu", product);
  sprintf(fileContents, "%d\t%f\n", my_rank, diff);

  /** Write File **/
  MPI_File_open( MPI_COMM_WORLD, fileName, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &out );
  MPI_File_seek(out, my_rank*strlen ( fileContents ) , MPI_SEEK_SET);
  MPI_File_write_all(out , &fileContents, strlen ( fileContents ), MPI_CHAR, &status );
  MPI_File_close(&out);

  /** Fin **/
  MPI_Barrier(MPI_COMM_WORLD);
  MPI_Finalize();
  return(0);
}
Ejemplo n.º 20
0
int main( int argc, char *argv[] )
{
  int opt;
  extern char   *optarg;
  extern int     optind;
  int is_output_timing=0, is_print_usage = 0;
  int _debug=0, use_gen_file = 0, use_actsto = 0, use_normalsto=0;
  char *token;

  MPI_Offset disp, offset, file_size;
  MPI_Datatype etype, ftype, buftype;

  int errs = 0;
  int size, rank, i, count;
  char *fname = NULL;
  double *buf;
  MPI_File fh;
  MPI_Comm comm;
  MPI_Status status;
  int64_t nitem = 0;
  int fsize = 0, type_size;
  double stime, etime, iotime, comptime, elapsed_time;
  double max_iotime, max_comptime;

  double max, min, sum=0.0, global_sum;

  MPI_Init( &argc, &argv );
 
  comm = MPI_COMM_WORLD;

  MPI_Comm_size( comm, &size );
  MPI_Comm_rank( comm, &rank );
 
  while ( (opt=getopt(argc,argv,"i:s:godhxt"))!= EOF) {
    switch (opt) {
    case 'i': fname = optarg;
      break;
    case 'o': is_output_timing = 1;
      break;
    case 'g': use_gen_file = 1;
      break;
    case 'd': _debug = 1;
      break;
    case 'h': is_print_usage = 1;
      break;
    case 's': 
        token = strtok(optarg, ":");
        //if (rank == 0) printf("token=%s\n", token);
        if(token == NULL) {
            if (rank == 0) printf("1: Wrong file size format!\n");
            MPI_Finalize();
            exit(1);
        }

        fsize = atoi(token);
        token = strtok(NULL, ":");
        //if (rank == 0) printf("token=%s\n", token);
        if(token == NULL) {
            if (rank == 0) printf("2: Wrong file size format!\n");
            MPI_Finalize();
            exit(1);
        }
        if(*token != 'm' && *token != 'g') {
            if (rank == 0) printf("3: Wrong file size format!\n");
            MPI_Finalize();
            exit(1);
        }
        if (rank ==0) printf("fsize = %d (%s)\n", fsize, (*token=='m'?"MB":"GB"));
      if (fsize == 0)
	nitem = 0;
      else {
	MPI_Type_size(MPI_DOUBLE, &type_size);
	nitem = fsize*1024; /* KB */
	nitem = nitem*1024; /* MB */
        if(*token == 'g') {
            //if(rank == 0) printf("data in GB\n");
            nitem = nitem*1024; /* GB */
        }
	nitem = nitem/type_size;
	//printf("nitem=%lld\n", nitem);
	nitem = nitem/size; /* size means comm size */
      }
      if (rank == 0) printf("nitem = %d\n", nitem);
      break;
    case 'x': use_actsto = 1;
      break;
    case 't': use_normalsto = 1;
      break;
    default: is_print_usage = 1;
      break;
    }
  }

  if (fname == NULL || is_print_usage == 1 || nitem == 0) {
    if (rank == 0) usage(argv[0]);
    MPI_Finalize();
    exit(1);
  }

  int sizeof_mpi_offset;
  sizeof_mpi_offset = (int)(sizeof(MPI_Offset)); // 8 
  //if (rank == 0) printf ("size_of_mpi_offset=%d\n", sizeof_mpi_offset);

  if(use_normalsto == 1 && use_actsto == 1) {
      if(rank == 0)
          printf("Can't test both: either normalsto or actsto\n");
      MPI_Finalize();
      exit(1);
  }
#if 0
  if(use_actsto == 1) {
      if (size != 1) {
          if(rank == 0)
              printf("active storage should be run with only 1 process!!!\n");
          MPI_Finalize();
          exit(1);
      }
  }
#endif
  /* initialize random seed: */
  srand(time(NULL));

  if(use_gen_file == 1) {
    int t, result;

    MPI_File_open( comm, fname, MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh );

    /* Set the file view */
    disp = rank * nitem * type_size;
    printf("%d: disp = %lld\n", rank, disp);
    etype = MPI_DOUBLE;
    ftype = MPI_DOUBLE;

    result = MPI_File_set_view(fh, disp, etype, ftype, "native", MPI_INFO_NULL);

    if(result != MPI_SUCCESS) 
      sample_error(result, "MPI_File_set_view");

    buf = (double *)malloc( nitem * sizeof(double) );

    if (buf == NULL) {
        if(rank == 0) printf("malloc() failed\n");
        MPI_Finalize();
        exit(1);
    }

    buf[0] = rand()%4096;
    if(rank==0) printf("%lf\n", buf[0]);
    max = min = sum = buf[0];

    for(i=1; i<nitem; i++) {
      t = rand()%4096;
      if (t>max) max = t;
      if (t<min) min = t;
      sum += t;
      buf[i] = t;
      if (i<10 && rank == 0) printf("%lf\n", buf[i]);
    }
    
    if(rank == 0) {
      printf("MPI_Type_size(MPI_DOUBLE)=%d\n", type_size);
      printf ("max=%lf, min=%lf, sum=%lf\n", max, min, sum);
    }

    stime = MPI_Wtime();
    /* Write to file */
    MPI_File_write_all( fh, buf, nitem, MPI_DOUBLE, &status );
    etime = MPI_Wtime();
    iotime = etime - stime;
      
    printf("%d: iotime (write) = %10.4f\n", rank, iotime);

    MPI_Get_count( &status, MPI_DOUBLE, &count );
    //printf("count = %lld\n", count);

    if (count != nitem) {
      fprintf( stderr, "%d: Wrong count (%lld) on write\n", rank, count );
      fflush(stderr);
      /* exit */
      MPI_Finalize();
      exit(1);
    }

    MPI_File_close(&fh);
    MPI_Barrier(MPI_COMM_WORLD);
    if(rank == 0) printf("File is written\n\n");
  }

  double *tmp = (double *)malloc( nitem * sizeof(double) );
  memset (tmp, 0, nitem*sizeof(double));

  if(use_normalsto == 1) {
      MPI_File_open( comm, fname, MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
      /* Read nothing (check status) */
      memset( &status, 0xff, sizeof(MPI_Status) );
      
      offset = rank * nitem * type_size;

      /* start I/O */
      stime = MPI_Wtime();
      MPI_File_read_at(fh, offset, tmp, nitem, MPI_DOUBLE, &status);
      etime = MPI_Wtime();
      /* end I/O */
      iotime = etime - stime;
      
      if(_debug==1) printf("%d: iotime = %10.4f\n", rank, iotime);
      MPI_Reduce(&iotime, &max_iotime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
      
      sum = 0.0; /* reset sum */
      
      /* start computation */
      stime = MPI_Wtime();
      
      for(i=0; i<nitem; i++) {
          sum += tmp[i];
      }
      
      MPI_Reduce(&sum, &global_sum, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
      etime = MPI_Wtime();
      /* end computation */

      comptime = etime - stime;

      if(_debug==1) printf("%d: comptime = %10.4f\n", rank, comptime);
      
      MPI_Reduce(&comptime, &max_comptime, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);

      if(rank == 0) {
          elapsed_time = max_comptime + max_iotime;
          printf("<<Result (SUM) with normal read>>\n"
                 "SUM              = %10.4f \n"
                 "Computation time = %10.4f sec\n"
                 "I/O time         = %10.4f sec\n"
                 "total time       = %10.4f sec\n\n", 
                 global_sum, max_comptime, max_iotime, elapsed_time);
      }
      
      MPI_File_close(&fh);
  }
#if 0
  if(use_actsto == 1) {
#if 0
    /* MPI_MAX */
    MPI_File_open( comm, fname, MPI_MODE_RDWR, MPI_INFO_NULL, &fh );

    stime = MPI_Wtime();
    MPI_File_read_at_ex( fh, offset, tmp, nitem, MPI_DOUBLE, MPI_MAX, &status );
    etime = MPI_Wtime();
    elapsed_time = etime-stime;
    printf ("<<Result with active storage>>\n"
	    "max=%lf (in %10.4f sec)\n", tmp[0], elapsed_time);
    
    MPI_File_close(&fh);
    
    /* MPI_MIN */
    MPI_File_open( comm, fname, MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
    
    stime = MPI_Wtime();
    MPI_File_read_at_ex( fh, offset, tmp, nitem, MPI_DOUBLE, MPI_MIN, &status );
    etime = MPI_Wtime();
    elapsed_time = etime - stime;
    printf ("min=%lf (in %10.4f sec)\n", tmp[0], elapsed_time); 
    
    MPI_File_close(&fh);
#endif

    /* MPI_SUM */
    MPI_File_open( comm, fname, MPI_MODE_RDWR, MPI_INFO_NULL, &fh );
    memset(&status, 0xff, sizeof(MPI_Status));
    offset = rank * nitem * type_size;
    
    stime = MPI_Wtime();
    MPI_File_read_at_ex( fh, offset, tmp, nitem, MPI_DOUBLE, MPI_SUM, &status );
    etime = MPI_Wtime();
    elapsed_time = etime - stime;
    printf ("<<Result with active storage>>\n"
            "sum=%lf (in %10.4f sec)\n", tmp[0], elapsed_time); 
    
    MPI_File_close( &fh );
  }
#endif
  MPI_Barrier(MPI_COMM_WORLD);
  if (use_gen_file == 1) free( buf );
  free( tmp );
 
  MPI_Finalize();
  return errs;
}
void stamp_matrix(Lattice *grid, double *matrix, string filename) {

#ifdef HAVE_MPI
    // Set variables for mpi output
    char *data_as_txt;
    int count;

    MPI_File   file;
    MPI_Status status;

    // each number is represented by charspernum chars
    const int charspernum = 14;
    MPI_Datatype num_as_string;
    MPI_Type_contiguous(charspernum, MPI_CHAR, &num_as_string);
    MPI_Type_commit(&num_as_string);

    // create a type describing our piece of the array
    int globalsizes[2] = {grid->global_dim_y - 2 * grid->periods[0] * grid->halo_y, grid->global_dim_x - 2 * grid->periods[1] * grid->halo_x};
    int localsizes [2] = {grid->inner_end_y - grid->inner_start_y, grid->inner_end_x - grid->inner_start_x};
    int starts[2]      = {grid->inner_start_y, grid->inner_start_x};
    int order          = MPI_ORDER_C;

    MPI_Datatype localarray;
    MPI_Type_create_subarray(2, globalsizes, localsizes, starts, order, num_as_string, &localarray);
    MPI_Type_commit(&localarray);

    // output real matrix
    //conversion
    data_as_txt = new char[(grid->inner_end_x - grid->inner_start_x) * (grid->inner_end_y - grid->inner_start_y) * charspernum];
    count = 0;
    for (int j = grid->inner_start_y - grid->start_y; j < grid->inner_end_y - grid->start_y; j++) {
        for (int k = grid->inner_start_x - grid->start_x; k < grid->inner_end_x - grid->start_x - 1; k++) {
            sprintf(&data_as_txt[count * charspernum], "%+.5e  ", matrix[j * grid->dim_x + k]);
            count++;
        }
        if(grid->mpi_coords[1] == grid->mpi_dims[1] - 1) {
            sprintf(&data_as_txt[count * charspernum], "%+.5e\n ", matrix[j * grid->dim_x + (grid->inner_end_x - grid->start_x) - 1]);
            count++;
        }
        else {
            sprintf(&data_as_txt[count * charspernum], "%+.5e  ", matrix[j * grid->dim_x + (grid->inner_end_x - grid->start_x) - 1]);
            count++;
        }
    }

    // open the file, and set the view
    MPI_File_open(grid->cartcomm, const_cast<char*>(filename.c_str()),
                  MPI_MODE_CREATE | MPI_MODE_WRONLY,
                  MPI_INFO_NULL, &file);

    MPI_File_set_view(file, 0,  MPI_CHAR, localarray, (char *)"native", MPI_INFO_NULL);

    MPI_File_write_all(file, data_as_txt, (grid->inner_end_x - grid->inner_start_x) * (grid->inner_end_y - grid->inner_start_y), num_as_string, &status);
    MPI_File_close(&file);
    delete [] data_as_txt;
#else
    print_matrix(filename.c_str(), &(matrix[grid->global_dim_x * (grid->inner_start_y - grid->start_y) + grid->inner_start_x - grid->start_x]), grid->global_dim_x,
                 grid->global_dim_x - 2 * grid->periods[1]*grid->halo_x, grid->global_dim_y - 2 * grid->periods[0]*grid->halo_y);
#endif
    return;
}
Ejemplo n.º 22
0
int main (int argc, char **argv)
{
    struct arguments arguments;

    /* Parse our arguments; every option seen by parse_opt will
       be reflected in arguments. */
    argp_parse (&argp, argc, argv, 0, 0, &arguments); 

    int run_type;
    run_type = 0; //default is serial
    if (sscanf (arguments.args[0], "%i", &run_type)!=1) {}

    int iterations;
    iterations = 0; //default is serial
    if (sscanf (arguments.args[1], "%i", &iterations)!=1) {}

    int count_when;
    count_when = 1000;
    if (sscanf (arguments.args[2], "%i", &count_when)!=1) {}

    char print_list[200]; //used for input list
    if (sscanf (arguments.args[3], "%s", &print_list)!=1) {}

    // printf("Print list = %s\n", print_list);

    //Extract animation list from arguments
    char char_array[20][12] = { NULL };   //seperated input list
    int animation_list[20][2] = { NULL }; //integer input list start,range
    char *tok = strtok(print_list, ",");

    //counters
    int i,j,k,x,y,ii,jj;
    ii = 0;
    jj = 0;

    //Loop over tokens parsing our commas
    int tok_len = 0;
    while (tok != NULL)
    {
        //first loop parses out commas
        tok_len = strlen(tok);
        for (jj=0;jj<tok_len;jj++)
        {
            char_array[ii][jj] = tok[jj];
        }

        // printf("Tok = %s\n", char_array[ii]);
        tok = strtok(NULL, ",");
        ii++;
    }

    //looking for a range input, convert to ints
    int stop;
    for (ii=0;ii<20;ii++)
    {
        //convert first number to int
        tok = strtok(char_array[ii], "-");
        if (tok != NULL)
        {
            animation_list[ii][0] = atoi(tok);
            tok = strtok(NULL, ",");
        }
        
        //look for second number, add to range
        if (tok != NULL)
        {
            stop = atoi(tok);
            animation_list[ii][1] = stop - animation_list[ii][0];
        }

        // if (rank == 0)
        // {
        //     printf("Animation_list = %i, %i\n", 
        //         animation_list[ii][0], animation_list[ii][1]);

        // }
    }
    
    
    

    //should an animation be generated
    //prints a bunch of .pgm files, have to hand
    //make the gif...
    int animation;
    animation = arguments.animation;

    //verbose?
    int verbose;
    verbose = arguments.verbose;
    // printf("VERBOSE = %i",verbose);
    if (verbose>0 && verbose<=10)
    {
        verbose = 1;
    }

    

    // Initialize the MPI environment
    MPI_Init(NULL, NULL);

    // Get the number of processes
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    // Get the rank of the process
    int rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // Get the name of the processor
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    int name_len;
    MPI_Get_processor_name(processor_name, &name_len);

    //Print run information, exit on bad command line input
    if (rank == 0 && verbose == 1)
    {
        printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n",
            verbose,run_type,iterations,count_when, animation);
    }
    if (world_size>1 && run_type ==0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (world_size==1 && run_type>0)
    {
        printf("Runtype and processors count not consistant\n");
        MPI_Finalize();
        exit(0);
    }
    if (count_when <= 0)
    {
        if (rank == 0)
        {
            printf("Invalid count interval, positive integers only\n");
        }
        MPI_Finalize();
        exit(0);
    }

     //serial
    if (world_size == 1 && run_type == 0)
    {

        ncols=1;
        nrows=1;
    }
    //Blocked
    else if (world_size>1 && run_type == 1)
    {
        ncols = 1;
        nrows = world_size;
        my_col = 0;
        my_row = rank;
    }
    //Checker
    else if (world_size>1 && run_type == 2)
    {
        ncols = (int)sqrt(world_size);
        nrows = (int)sqrt(world_size);

        my_row = rank/nrows;
        my_col = rank-my_row*nrows;

        if (ncols*nrows!=world_size)
        {
            if (rank == 0)
            {
                printf("Number of processors must be square, Exiting\n");
            }
            MPI_Finalize();
            exit(0);
        }
    }

    // if (verbose == 1)
    // {
    //     printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col);
    // }

    
    //////////////////////READ IN INITIAL PGM////////////////////////////////
    if(!readpgm("life.pgm"))
    {
        // printf("WR=%d,HERE2\n",rank);
        if( rank==0 )
        {
            pprintf( "An error occured while reading the pgm file\n" );
        }
        MPI_Finalize();
        return 1;
    }

    // Count the life forms. Note that we count from [1,1] - [height+1,width+1];
    // we need to ignore the ghost row!
    i = 0;
    for(y=1; y<local_height+1; y++ )
    {
        for(x=1; x<local_width+1; x++ )
        {
            if( field_a[ y * field_width + x ] )
            {
                i++;
            }
        }
    }
    // pprintf( "%i local buggies\n", i );

    int total;
    MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if( rank==0  && verbose == 1 )
    {
        pprintf( "%i total buggies\n", total );
    }
    

    
    // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col);

    //Row and column size per processor
    int rsize, csize; 
    rsize = local_width;
    csize = local_height;


    if (rank == 0 && verbose == 1)
    {
        printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size);
    }
    
    //Create new derived datatype for writing to files
    MPI_Datatype submatrix;

    int array_of_gsizes[2];
    int array_of_distribs[2];
    int array_of_dargs[2];
    int array_of_psize[2];

    if (run_type == 1)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }
    else if (run_type == 2)
    {
        if (rank == 0)
        {
            printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows);
            printf("p0,p1 = %i,%i\n", nrows, ncols);
        }
        array_of_gsizes[0] = local_height*ncols;
        array_of_gsizes[1] = local_width*nrows;
        array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
        array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
        array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
        array_of_psize[0] = nrows;
        array_of_psize[1] = ncols;
        // int order = MPI_ORDER_C;

        //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes
        //order,oldtype,*newtype
        MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs,
                array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix);
        MPI_Type_commit(&submatrix);
    }



    MPI_Barrier(MPI_COMM_WORLD);

    //////////////////ALLOCATE ARRAYS, CREATE DATATYPES/////////////////////

    //Create new column derived datatype
    MPI_Datatype column;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column);
    MPI_Type_commit(&column);

    //Create new row derived datatype
    MPI_Datatype row;
    //count, blocklength, stride, oldtype, *newtype
    MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row);
    MPI_Type_commit(&row);

    //allocate arrays and corner storage
    unsigned char *section;
    unsigned char *neighbors;
    //to use
    unsigned char *top;
    unsigned char *bot;
    unsigned char *left;
    unsigned char *right;
    //to send
    unsigned char *ttop;
    unsigned char *tbot;
    unsigned char *tleft;
    unsigned char *tright;
    //MALLOC!!
    section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char));
    top = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    bot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    left = (unsigned char*)malloc(csize*sizeof(unsigned char));
    right = (unsigned char*)malloc(csize*sizeof(unsigned char));
    ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char));
    tleft = (unsigned char*)malloc(csize*sizeof(unsigned char));
    tright = (unsigned char*)malloc(csize*sizeof(unsigned char));

    //corners
    unsigned char topleft,topright,botleft,botright; //used in calculations
    unsigned char ttopleft,ttopright,tbotleft,tbotright; 
    topleft = 255;
    topright = 255;
    botleft = 255;
    botright = 255;

    //used for animation, each process will put there own result in and then
    //each will send to process 1 which will add them up
    unsigned char* full_matrix;
    unsigned char* full_matrix_buffer;
    if (animation == 1)
    {
        int msize1 = rsize*ncols*csize*nrows;
        full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char));
        for (i=0; i<msize1; i++)
        {
            full_matrix[i] = 0;
            full_matrix_buffer[i] = 0;
        }
    }

    
    // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height);

    //Serial initialize vars
    int count = 0;
    if (world_size == 1 && run_type == 0)
    {
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }
        // printf("COUNT 4 = %d\n", count);
    }

    //Blocked/Checkered initializing variables
    else if (world_size > 1 && (run_type == 1 || run_type == 2))
    {
        //initialize
        for (i=0;i<csize;i++)
        {
            for (j=0;j<rsize;j++)
            {
                section[i*rsize + j] = 255;
                
                if (field_a[(i+1)*(2+rsize) + j + 1])
                {
                    section[i*rsize + j] = 0;
                    count += 1;
                }
                else
                {
                    section[i*rsize + j] = 255;
                }

                top[j] = 255;
                bot[j] = 255;
                ttop[j] = 255;
                tbot[j] = 255;
            }
            right[i] = 255;
            left[i] = 255;
            tright[i] = 255;
            tleft[i] = 255;
        }

        // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD );
        // if (rank == 0)
        // {
        //     printf("COUNT 4 = %d\n", total);
        // }
        
    }


    //header/footer for mpio writes
    char header1[15];
    header1[0] = 0x50;
    header1[1] = 0x35;
    header1[2] = 0x0a;
    header1[3] = 0x35;
    header1[4] = 0x31;
    header1[5] = 0x32;
    header1[6] = 0x20;
    header1[7] = 0x35;
    header1[8] = 0x31;
    header1[9] = 0x32;
    header1[10] = 0x0a;
    header1[11] = 0x32;
    header1[12] = 0x35;
    header1[13] = 0x35;
    header1[14] = 0x0a;

    char footer;
    footer = 0x0a;

    //make a frame or not?
    int create_frame = 0;

    //send to 
    int send_to;
    int receive_from;
    int info[5];
    info[2] = rank;
    info[3] = rsize;
    info[4] = csize;
    unsigned char info2[4];
    info2[0] = topleft;
    info2[1] = topright;
    info2[2] = botleft;
    info2[3] = botright;

    int current_count;
    int location;

    //Gameplay
    for (k=0;k<iterations;k++)
    {
        //Count buggies
        if (k%count_when==0)
        {
            if (verbose == 1)
            {
                current_count = rsize*csize-count_buggies(rsize,csize,section);
                MPI_Allreduce( &current_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
                if (rank == 0)
                {
                    printf("Iteration=%5d,  Count=%6d\n", k,total);
                }
                ////corner debug
                // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright);
            }
        }

        
        //Write to file serially for comparison
        //If animation is requested
        if (animation == 1 && run_type == 0)
        {
            //Put smaller matrix part into larger matrix
            for (i=0; i<csize; i++)
            {
                for (j=0; j<rsize; j++)
                {
                    location = (my_row*csize*rsize*ncols + my_col*rsize + 
                                    i*rsize*ncols + j);

                    full_matrix_buffer[location] = section[i*rsize+j];
                }
                // if (rank == 0)
                // {
                //     printf("Location = %d\n", location);
                // }
            }

            //Gather matrix
            MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, 
                MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD);

            
            if (rank == 0 && run_type == 0)
            {
                write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix);
            }
        }
        //mpio write pgm
        else if (animation == 1 && (run_type == 1 || run_type == 2))
        {
            //default is no frame
            create_frame = 0;
            for (ii=0;ii<20;ii++)
            {
                for (jj=0;jj<animation_list[ii][1]+1;jj++)
                {
                    // if (rank == 0)
                    // {
                    //     printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n",
                    //         animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0);
                    // }
                    if ((animation_list[ii][0] + jj - k) == 0)
                    {

                        create_frame = 1;
                        break;
                    }
                }
            }

            if (create_frame == 1)
            {
               //dynamic filename with leading zeroes for easy conversion to gif
                char buffer[128];
                snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k);

                /* open the file, and set the view */
                MPI_File file;
                MPI_File_open(MPI_COMM_WORLD, buffer, 
                              MPI_MODE_CREATE|MPI_MODE_WRONLY,
                              MPI_INFO_NULL, &file);

                MPI_File_set_view(file, 0,  MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                                       "native", MPI_INFO_NULL);

                //write header
                MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE);

                //write matrix
                MPI_File_set_view(file, 15,  MPI_UNSIGNED_CHAR, submatrix, 
                                       "native", MPI_INFO_NULL);

                MPI_File_write_all(file, section, rsize*csize, 
                        MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE);

                //write footer (trailing newline)
                MPI_File_set_view(file, 15+rsize*ncols*csize*nrows,  
                        MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, 
                        "native", MPI_INFO_NULL);

                MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); 
            } 
        }


        // BLOCKED COMMUNITATION //
        if (run_type == 1)
        {
            //change bot (send top) to account for middle area
            //alternate to avoid locking
            send_to = rank - 1;
            receive_from = rank + 1;

            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //send top, receive bot
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //change top to account for middle area
            //alternate to avoid locking
            send_to = rank + 1;
            receive_from = rank - 1;

            //send bot, receive top
            if (rank%2==0)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                
                if (receive_from<world_size && receive_from >= 0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                // printf("%d, %d, %d\n", rank, send_to, receive_from);
                if (receive_from<world_size && receive_from >= 0)
                {
                    //*data,count,type,from,tag,comm,mpi_status
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }

                if (send_to<world_size && send_to>=0)
                {
                    //*data,count,type,to,tag,comm
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }
        }

        // CHECKERED COMMUNITATION //
        else if (run_type == 2)
        {
            //figure out what to send
            //top and bottom
            for (i=0;i<rsize;i++)
            {
                ttop[i] = section[i];
                tbot[i] = section[rsize*(csize-1)+i];
            }

            //left n right
            for (i=0;i<csize;i++)
            {
                tleft[i] = section[0 + rsize*i];
                tright[i] = section[rsize-1 + rsize*i];
            }

            //corners
            ttopleft = tleft[0];
            tbotleft = tleft[csize-1];
            ttopright = tright[0];
            tbotright = tright[csize-1];

            //Send top, receive bot
            send_to = rank - nrows;
            receive_from = rank + nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send bot, receive top
            send_to = rank + nrows;
            receive_from = rank - nrows;
            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {

                if (receive_from<world_size && receive_from>=0)
                {
                    MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0)
                {
                    MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send left, receive right
            send_to = rank - 1;
            receive_from = rank + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send right, receive left
            send_to = rank + 1;
            receive_from = rank - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row)
                {
                    MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row)
                {
                    MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topright, receive botleft
            send_to = rank - ncols + 1;
            receive_from = rank + ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send topleft, receive botright
            send_to = rank - ncols - 1;
            receive_from = rank + ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1)
                {
                    MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1)
                {
                    MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botleft, receive topright
            send_to = rank + ncols - 1;
            receive_from = rank - ncols + 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }

            //Send botright, receive topleft
            send_to = rank + ncols + 1;
            receive_from = rank - ncols - 1;

            if (rank%2==0)
            {
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
            }
            else if (rank%2==1)
            {
                if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1)
                {
                    MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD,
                        MPI_STATUS_IGNORE);
                }
                if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1)
                {
                    MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD);
                }
            }


            info2[0] = topleft;
            info2[1] = topright;
            info2[2] = botleft;
            info2[3] = botright;

        }
 
        // if (rank == 1){
        //     print_matrix(rsize, 1, top);
        //     print_matrix(rsize, csize, section);
        //     print_matrix(rsize, 1, bot);
        //     printf("\n");
        // }
        // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize);
        


        /////////// CELL UPDATES /////////////////
        //count neighbor
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                info[0] = i;
                info[1] = j;
                neighbors[i*rsize+j] = count_neighbors(info, info2, section, 
                                    top, bot, left, right);
                // printf("%i",neighbors[i*rsize+j]);
            }
            // printf("\n");
        }

        //update cells
        current_count = 0;
        for (i=0;i<csize;i++)
        {
            for (j=0; j<rsize; j++)
            {
                //cell currently alive
                if (section[i*rsize+j] == 0)
                {
                    //2 or 3 neighbors lives, else die
                    if (neighbors[i*rsize+j] < 2 || 
                        neighbors[i*rsize+j] > 3)
                    {
                        section[i*rsize+j] = 255;
                    }
                }
                else
                {
                    //Exactly 3 neighbors spawns new life
                    if (neighbors[i*rsize+j] == 3)
                    {
                        section[i*rsize+j] = 0;
                    }
                }
            }
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    sleep(0.5);
    //free malloc stuff
    if( field_a != NULL ) free( field_a );
    if( field_b != NULL ) free( field_b );
    free(section);
    free(neighbors);
    free(top);
    free(bot);
    free(left);
    free(right);

    MPI_Finalize();
    exit (0);
}    
Ejemplo n.º 23
0
Archivo: gio.c Proyecto: kento/gio
void do_collective_write()
{
  MPI_Info info;
  MPI_Datatype contig;
  MPI_Comm sub_write_comm;
  MPI_File fh;
  char coll_path[PATH_LEN];
  int sub_comm_size, sub_rank, sub_comm_color;
  int striping_factor_int;
  int disp;
  int rc;
  int *buf;


  ptimes[0].start = MPI_Wtime();
  ptimes[1].start = MPI_Wtime();
  //  if (m_size == 1) {
  //    sub_write_comm = MPI_COMM_WORLD;
  //    sub_comm_color = 0;
  //  } else {
    sub_comm_color = get_sub_collective_io_comm(&sub_write_comm);  
    //  }

  /* Construct a datatype for distributing the input data across all
   * processes. */
  MPI_Type_contiguous(data_size / sizeof(int), MPI_INT, &contig);
  MPI_Type_commit(&contig);
  
  /* Set the stripe_count and stripe_size, that is, the striping_factor                                                                                                                                    
   * and striping_unit. Both keys and values for MPI_Info_set must be                                                                                                                                      
   * in the form of ascii strings. */
  MPI_Info_create(&info);
  striping_factor_int = max_striping_factor / m_size;
  if (striping_factor_int == 0) striping_factor_int = 1;
  sprintf(striping_factor, "%d", striping_factor_int);
  MPI_Info_set(info, "striping_factor", striping_factor);
  MPI_Info_set(info, "striping_unit", striping_unit);
  //  MPI_Info_set(info, "romio_cb_write", "enable");                                                                                                                                                       
  //  MPI_Info_set(info, "romio_cb_write", "disable");

  /* Get path to the target file of the communicator */
  MPI_Comm_size(sub_write_comm, &sub_comm_size);
  get_coll_io_path(coll_path, sub_comm_color);

  /* Delete the output file if it exists so that striping can be set                                                                                                                                          * on the output file. */
  //  rc = MPI_File_delete(coll_path, info);

  /* Create write data*/
  MPI_Comm_rank(sub_write_comm, &sub_rank);
  buf = create_io_data(sub_rank);
  /* if (sub_rank == 0) { */
  /*   rc = MPI_File_delete(coll_path, MPI_INFO_NULL); */
  /*   if (rc != MPI_SUCCESS) { */
  /*     gio_err("MPI_File_delete failed  (%s:%s:%d)", __FILE__, __func__, __LINE__); */
  /*   } */
  /* } */
  ptimes[1].end = MPI_Wtime();

  MPI_Barrier(MPI_COMM_WORLD);

  /* Open the file */
  //  gio_dbg("start ***********************");  
  ptimes[2].start = MPI_Wtime();
  rc = MPI_File_open(sub_write_comm, coll_path, 
		     MPI_MODE_WRONLY | MPI_MODE_CREATE, 
		     info, &fh);

  ptimes[2].end = MPI_Wtime();

  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_open failed  (%s:%s:%d)", __FILE__, __func__, __LINE__);
  }

  //  gio_dbg("start *********************** %d", sub_rank);  
  ptimes[3].start = MPI_Wtime();
  /* Set the file view for the output file. In this example, we will                                                                                                                                          * use the same contiguous datatype as we used for reading the data                                                                                                                                          * into local memory. A better example would be to write out just                                                                                                                                            * part of the data, say 4 contiguous elements followed by a gap of                                                                                                                                          * 4 elements, and repeated. */
  disp = sub_rank * data_size;
#ifdef GIO_LARGE_FILE
  int i;
  for (i = 0; i < sub_rank; i++) {
    MPI_File_seek(fh, data_size, MPI_SEEK_CUR);
  }
#else  
  MPI_File_set_view(fh, disp, contig, contig, "native", info);
#endif
  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_set_view failed  (%s:%s:%d)", __FILE__, __func__, __LINE__);
  }
  ptimes[3].end = MPI_Wtime();
  //  gio_dbg("end ***********************");  

  /* MPI Collective Write */
  ptimes[4].start = MPI_Wtime();
  rc = MPI_File_write_all(fh, buf, 1, contig, MPI_STATUS_IGNORE);
  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_set_view failed  (%s:%s:%d)", __FILE__, __func__, __LINE__);
  }
  ptimes[4].end = MPI_Wtime();

  /*Free data*/
  free_io_data(buf);

  /* Close Files */
  ptimes[5].start = MPI_Wtime();
  MPI_File_close(&fh);
  ptimes[5].end = MPI_Wtime();
  ptimes[0].end = MPI_Wtime();

  print_results();

  return;
}
Ejemplo n.º 24
0
int
main(int argc, char* argv[]) 
{
   int n, my_rank;
   int array_of_subsizes[NDIMS], array_of_starts[NDIMS], array_of_sizes[NDIMS];
   int size = 4;
   int sqrtn;
   int ln;
   MPI_Datatype filetype, memtype;
   MPI_File fh;
   char hdr[128];
   int header_bytes;
   unsigned char *cur;
   char name[128];
   int resultlen;
   int ret;
   int i, j;

   /* Initialize MPI. */
   MPI_Init(&argc, &argv);
   MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN);

   /* Learn my rank and the total number of processors. */
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
   MPI_Comm_size(MPI_COMM_WORLD, &n);

   /* Speak! */
   MPI_Get_processor_name(name, &resultlen);
   printf("process %d running on %s\n", my_rank, name);

   /* Set up our values. */
   sqrtn = (int)sqrt(n);
   ln = size/sqrtn;
   printf("n = %d, sqrtn = %d, ln = %d storage = %d\n", n, sqrtn, ln, (ln + 2) * (ln + 2));

   /* Allocation storage. */
   if (!(cur = calloc((ln + 2) * (ln + 2), 1)))
      return ERR;

   /* Initialize data. */
   for (i = 1; i < ln + 1; i++)
      for (j = 1; j < ln + 1; j++)
	 cur[i * (ln + 2) + j] = my_rank;

   /* Create a subarray type for the file. */
   array_of_sizes[0] = array_of_sizes[1] = size;
   array_of_subsizes[0] = array_of_subsizes[1] = ln;
   array_of_starts[0] = my_rank/sqrtn * ln;
   array_of_starts[1] = (my_rank % sqrtn) * ln;
   if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &filetype)))
      MPIERR(ret);
   if ((ret = MPI_Type_commit(&filetype)))
      MPIERR(ret);

   /* Create a subarray type for memory. */
   array_of_sizes[0] = array_of_sizes[1] = ln + 2;
   array_of_subsizes[0] = array_of_subsizes[1] = ln;
   array_of_starts[0] = array_of_starts[1] = 1;
   if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &memtype)))
      MPIERR(ret);
   if ((ret = MPI_Type_commit(&memtype)))
      MPIERR(ret);

   MPI_File_delete(FILE_NAME, MPI_INFO_NULL);
   if ((ret = MPI_File_open(MPI_COMM_WORLD, FILE_NAME, MPI_MODE_CREATE|MPI_MODE_RDWR, 
        MPI_INFO_NULL, &fh)))
      MPIERR(ret);

   /* Create header info, and have process 0 write it to the file. */
   sprintf(hdr, "P5\n%d %d\n255\n", size, size);
   header_bytes = strlen(hdr);
   if ((ret = MPI_File_write_all(fh, hdr, header_bytes, MPI_BYTE, MPI_STATUS_IGNORE)))
      MPIERR(ret);
	 
   /* Set the file view to translate our memory data into the file's data layout. */
   MPI_File_set_view(fh, header_bytes, MPI_BYTE, filetype, "native", MPI_INFO_NULL);

   /* Write the output. */
   MPI_File_write(fh, cur, 1, memtype, MPI_STATUS_IGNORE);

   if ((ret = MPI_File_close(&fh)))
      MPIERR(ret);

   MPI_Finalize();
   return 0;
}
Ejemplo n.º 25
0
int test_file(char *filename, int mynod, int nprocs, char * cb_hosts, const char *msg, int verbose)
{
    MPI_Datatype typevec, newtype, t[3];
    int *buf, i, b[3], errcode, errors=0;
    MPI_File fh;
    MPI_Aint d[3];
    MPI_Status status;
    int SIZE = (STARTING_SIZE/nprocs)*nprocs;
    MPI_Info info;

    if (mynod==0 && verbose) fprintf(stderr, "%s\n", msg);

    buf = (int *) malloc(SIZE*sizeof(int));
    if (buf == NULL) {
	    perror("test_file");
	    MPI_Abort(MPI_COMM_WORLD, -1);
    }


    if (cb_hosts != NULL ) {
	    MPI_Info_create(&info);
	    MPI_Info_set(info, "cb_config_list", cb_hosts);
    } else {
	    info = MPI_INFO_NULL;
    }

    MPI_Type_vector(SIZE/nprocs, 1, nprocs, MPI_INT, &typevec);

    b[0] = b[1] = b[2] = 1;
    d[0] = 0;
    d[1] = mynod*sizeof(int);
    d[2] = SIZE*sizeof(int);
    t[0] = MPI_LB;
    t[1] = typevec;
    t[2] = MPI_UB;

    MPI_Type_struct(3, b, d, t, &newtype);
    MPI_Type_commit(&newtype);
    MPI_Type_free(&typevec);

    if (!mynod) {
	if(verbose) fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using collective I/O\n");
	MPI_File_delete(filename, info);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    errcode = MPI_File_open(MPI_COMM_WORLD, filename,
		    MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh);
    if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "MPI_File_open");
    }

    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);

    for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod,i,SIZE);
    errcode = MPI_File_write_all(fh, buf, 1, newtype, &status);
    if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "nc mem - nc file: MPI_File_write_all");
    }

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    errcode = MPI_File_read_at_all(fh, 0, buf, 1, newtype, &status);
    if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "nc mem - nc file: MPI_File_read_at_all");
    }

    /* the verification for N compute nodes is tricky. Say we have 3
     * processors.
     * process 0 sees: 0 -1 -1 3 -1 -1 ...
     * process 1 sees: -1 34 -1 -1 37 -1 ...
     * process 2 sees: -1 -1 68 -1 -1 71 ... */

    /* verify those leading -1s exist if they should */
    for (i=0; i<mynod; i++ ) {
	    if ( buf[i] != -1 ) {
		    if(verbose) fprintf(stderr, "Process %d: buf is %d, should be -1\n", mynod, buf[i]);
		    errors++;
	    }
    }
    /* now the modulo games are hairy.  processor 0 sees real data in the 0th,
     * 3rd, 6th... elements of the buffer (assuming nprocs==3 ).  proc 1 sees
     * the data in 1st, 4th, 7th..., and proc 2 sees it in 2nd, 5th, 8th */

    for(/* 'i' set in above loop */; i<SIZE; i++) {
	    if ( ((i-mynod)%nprocs) && buf[i] != -1)  {
		    if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be -1\n",
				    mynod, i, buf[i]);
		    errors++;
	    }
	    if ( !((i-mynod)%nprocs) && buf[i] != SEEDER(mynod,i,SIZE) ) {
		    if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
				    mynod, i, buf[i], SEEDER(mynod,i,SIZE));
		    errors++;
	    }
    }
    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
	if(verbose) fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using collective I/O\n");
	MPI_File_delete(filename, info);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  info, &fh);

    for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod,i,SIZE);
    errcode = MPI_File_write_at_all(fh, mynod*(SIZE/nprocs)*sizeof(int),
		    buf, 1, newtype, &status);
    if (errcode != MPI_SUCCESS)
	    handle_error(errcode, "nc mem - c file: MPI_File_write_at_all");

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    errcode = MPI_File_read_at_all(fh, mynod*(SIZE/nprocs)*sizeof(int),
		    buf, 1, newtype, &status);
    if (errcode != MPI_SUCCESS)
	    handle_error(errcode, "nc mem - c file: MPI_File_read_at_all");

    /* just like as above */
    for (i=0; i<mynod; i++ ) {
	    if ( buf[i] != -1 ) {
		    if(verbose) fprintf(stderr, "Process %d: buf is %d, should be -1\n", mynod, buf[i]);
		    errors++;
	    }
    }
    for(/* i set in above loop */; i<SIZE; i++) {
	    if ( ((i-mynod)%nprocs) && buf[i] != -1)  {
		    if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be -1\n",
				    mynod, i, buf[i]);
		    errors++;
	    }
	    if ( !((i-mynod)%nprocs) && buf[i] != SEEDER(mynod,i,SIZE)) {
		    if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
				    mynod, i, buf[i], SEEDER(mynod,i,SIZE) );
		    errors++;
	    }
    }

    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
	if(verbose) fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using collective I/O\n");
	MPI_File_delete(filename, info);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  info, &fh);

    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info);

    for (i=0; i<SIZE; i++) buf[i] = SEEDER(mynod, i, SIZE);
    errcode = MPI_File_write_all(fh, buf, SIZE, MPI_INT, &status);
    if (errcode != MPI_SUCCESS)
	    handle_error(errcode, "c mem - nc file: MPI_File_write_all");

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    errcode = MPI_File_read_at_all(fh, 0, buf, SIZE, MPI_INT, &status);
    if (errcode != MPI_SUCCESS)
	    handle_error(errcode, "c mem - nc file: MPI_File_read_at_all");

    /* same crazy checking */
    for (i=0; i<SIZE; i++) {
	    if (buf[i] != SEEDER(mynod, i, SIZE)) {
		if(verbose) fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], SEEDER(mynod, i, SIZE));
		errors++;
	    }
    }

    MPI_File_close(&fh);

    MPI_Type_free(&newtype);
    free(buf);
    if (info != MPI_INFO_NULL) MPI_Info_free(&info);
    return errors;
}
Ejemplo n.º 26
0
int main(int argc,char* argv[])
{
   char*  params = NULL;
   char   gauge_name[qcd_MAX_STRING_LENGTH];
   char   param_name[qcd_MAX_STRING_LENGTH];
   char   out_name[qcd_MAX_STRING_LENGTH];
   qcd_int_4   x_src[4],lx_src[4],i,nsmear,nsmearAPE;
   qcd_real_8   alpha ,alphaAPE,plaq;
   int params_len;   

   qcd_geometry geo;
   qcd_gaugeField u, uAPE;
   qcd_gaugeField *u_ptr, *uAPE_ptr, *utmp_ptr;
   qcd_vector vec;
   qcd_uint_2 P[4];
   qcd_uint_2 L[4];
   qcd_real_8 theta[4]={M_PI,0.,0.,0.}; // boundary conditions

   int myid,numprocs, namelen;    
   char processor_name[MPI_MAX_PROCESSOR_NAME];


   //set up MPI
   MPI_Init(&argc, &argv);
   MPI_Comm_size(MPI_COMM_WORLD,&numprocs);         // num. of processes taking part in the calculation
   MPI_Comm_rank(MPI_COMM_WORLD,&myid);             // each process gets its ID
   MPI_Get_processor_name(processor_name,&namelen); // 


//////////////////// READ INPUT FILE /////////////////////////////////////////////
      
   if(argc!=2)
   {
      if(myid==0) fprintf(stderr,"No input file specified\n");
      exit(EXIT_FAILURE);
   }

   strcpy(param_name,argv[1]);
   if(myid==0)
   {
      i=0;
      printf("opening input file %s\n",param_name);
      params=qcd_getParams(param_name,&params_len);
      if(params==NULL)
      {
         i=1;
      }
   }
   MPI_Bcast(&i,1,MPI_INT, 0, MPI_COMM_WORLD);
   if(i==1) exit(EXIT_FAILURE);
   MPI_Bcast(&params_len, 1, MPI_INT, 0, MPI_COMM_WORLD);
   if(myid!=0) params = (char*) malloc(params_len*sizeof(char));
   MPI_Bcast(params, params_len, MPI_CHAR, 0, MPI_COMM_WORLD);
   
   sscanf(qcd_getParam("<processors_txyz>",params,params_len),"%hd %hd %hd %hd",&P[0], &P[1], &P[2], &P[3]);
   if(P[0] != 1)
     {
       fprintf(stderr, " Must use only 1 process along t-direction, exiting...\n");
       exit(EXIT_FAILURE);
     }
   sscanf(qcd_getParam("<lattice_txyz>",params,params_len),"%hd %hd %hd %hd",&L[0], &L[1], &L[2], &L[3]);
   if(qcd_initGeometry(&geo,L,P, theta, myid, numprocs)) exit(EXIT_FAILURE);
   
   if(myid==0) printf(" Local lattice: %i x %i x %i x %i\n",geo.lL[0],geo.lL[1],geo.lL[2],geo.lL[3]);  

   sscanf(qcd_getParam("<source_pos_txyz>",params,params_len),"%d %d %d %d",&x_src[0], &x_src[1], &x_src[2], &x_src[3]);
   if(myid==0) printf(" Got source coords: %d %d %d %d\n",x_src[0],x_src[1],x_src[2],x_src[3]);
   
   sscanf(qcd_getParam("<alpha_gauss>",params,params_len),"%lf", &alpha);
   if(myid==0) printf(" Got alpha_gauss: %lf\n",alpha);
   sscanf(qcd_getParam("<nsmear_gauss>",params,params_len),"%d",&nsmear);
   if(myid==0) printf(" Got nsmear_gauss: %d\n",nsmear);
   sscanf(qcd_getParam("<alpha_APE>",params,params_len),"%lf",&alphaAPE);
   if(myid==0) printf(" Got alpha_APE: %lf\n",alphaAPE);
   sscanf(qcd_getParam("<nsmear_APE>",params,params_len),"%d",&nsmearAPE);
   if(myid==0) printf(" Got nsmear_APE: %d\n",nsmearAPE);   
   strcpy(gauge_name,qcd_getParam("<cfg_name>",params,params_len));
   if(myid==0) printf(" Got conf name: %s\n",gauge_name);
   strcpy(out_name,qcd_getParam("<src_block_name>",params,params_len));
   if(myid==0) printf(" Got out name: %s\n",out_name);
  

   free(params);      
   ///////////////////////////////////////////////////////////////////////////////////////////////////
   
   if(nsmear != 0)
     {
       qcd_initGaugeField(&u,&geo);
       qcd_initGaugeField(&uAPE,&geo);

       if(qcd_getGaugeField(gauge_name,qcd_GF_LIME,&u))
	 {
	   fprintf(stderr,"process %i: Error reading gauge field!\n",myid);
	   exit(EXIT_FAILURE);
	 }
       
       if(myid==0) printf("gauge-field loaded\n");
       plaq = qcd_calculatePlaquette(&u);
       if(myid==0) printf("plaquette = %e\n",plaq);
       
       u_ptr = &u;
       uAPE_ptr = &uAPE;   
       for(i=0; i<nsmearAPE; i++)
	 {
	   qcd_apeSmear3d(uAPE_ptr, u_ptr, alphaAPE);
	   utmp_ptr=u_ptr; u_ptr=uAPE_ptr; uAPE_ptr=utmp_ptr;   
	 }
       utmp_ptr=u_ptr; u_ptr=uAPE_ptr; uAPE_ptr=utmp_ptr; //reverse the last swap. Also needed when nsmearAPE=0
       qcd_destroyGaugeField(u_ptr);
       uAPE = *uAPE_ptr;
    
       if(myid==0) printf("gauge-field APE-smeared\n");
       plaq = qcd_calculatePlaquette(&uAPE);
       if(myid==0) printf("plaquette = %e\n",plaq); 
   
       //qcd_initPropagator(&source,&geo);
     }
   
   qcd_initVector(&vec,&geo);
  
   // which process has the source coords?
   for(i=0; i<4; i++)
      lx_src[i] = x_src[i]/geo.lL[i];
          
   qcd_zeroVector(&vec); 
   if( (lx_src[0]==geo.Pos[0]) && 
       (lx_src[1]==geo.Pos[1]) && 
       (lx_src[2]==geo.Pos[2]) && 
       (lx_src[3]==geo.Pos[3]) )
     {
       vec.D[qcd_LEXIC((x_src[0]%geo.lL[0]),
		       (x_src[1]%geo.lL[1]),
		       (x_src[2]%geo.lL[2]),
		       (x_src[3]%geo.lL[3]), geo.lL)][0][0].re=1.;
     }
   for(i=0; i<nsmear; i++)
     {
       if(qcd_gaussIteration3d(&vec,&uAPE,alpha,x_src[0]))
	 {
	   fprintf(stderr,"process %i: Error while smearing!\n",geo.myid);
	   exit(EXIT_FAILURE);
	 }	   
     }

   qcd_real_8 *src = malloc(sizeof(qcd_real_8)*geo.lL[1]*geo.lL[2]*geo.lL[3]);
   if(src == NULL)
     {
       fprintf(stderr, "process %i: malloc returned NULL!\n",geo.myid);
       exit(EXIT_FAILURE);
     }
   if( lx_src[0]==geo.Pos[0] )
     {
       for(int z=0; z<geo.lL[3]; z++)
	 for(int y=0; y<geo.lL[2]; y++)
	   for(int x=0; x<geo.lL[1]; x++)
	     {       
	       int lv = qcd_LEXIC((x_src[0]%geo.lL[0]), x, y, z, geo.lL);
	       double loc_norm = 0;
	       for(int mu=0; mu<4; mu++)
		 for(int c=0; c<3; c++)
		   {
		     loc_norm += qcd_NORMSQUARED(vec.D[lv][mu][c]);
		   }
	       src[x+geo.lL[1]*(y+z*geo.lL[2])] = loc_norm;
	     }
     }
   qcd_destroyVector(&vec);
   /*
    * This assumes only 1 process in time
    * If not, anything may happen
    */

   MPI_Datatype fileview;
   MPI_File fh;
   MPI_Status status;
   int globv3[] = {geo.L[3], geo.L[2], geo.L[1]};
   int locv3[] = {geo.lL[3], geo.lL[2], geo.lL[1]};
   int starts[] = {geo.Pos[3]*locv3[0], geo.Pos[2]*locv3[1], geo.Pos[1]*locv3[2]};

   MPI_Type_create_subarray(3, globv3, locv3, starts, MPI_ORDER_C, MPI_DOUBLE, &fileview);
   MPI_Type_commit(&fileview);
   MPI_File_open(MPI_COMM_WORLD, out_name, MPI_MODE_WRONLY|MPI_MODE_CREATE, MPI_INFO_NULL, &fh);
   MPI_File_set_size(fh, 0);
   MPI_File_set_view(fh, 0, MPI_DOUBLE, fileview, "native", MPI_INFO_NULL);
   if(!qcd_isBigEndian())
     {
       qcd_swap_8(src, (geo.lL[1]*geo.lL[2]*geo.lL[3]));
     }
   MPI_File_write_all(fh, src, (geo.lL[1]*geo.lL[2]*geo.lL[3]), MPI_DOUBLE, &status);
   MPI_File_close(&fh);
   free(src);
   if(nsmear != 0)
     qcd_destroyGaugeField(&uAPE); 

   
   ////////////////////////////////////// CLEAN UP AND EXIT ///////////////////////////////////////////
   //qcd_destroyPropagator(&source);
   qcd_destroyGeometry(&geo);
   MPI_Finalize();
   return(EXIT_SUCCESS);
}//end main
Ejemplo n.º 27
0
int main(int argc, char **argv) {
    int info, i, j, pcol, Adim;
    double *D;
    int *DESCD;
    CSRdouble BT_i, B_j, Xsparse, Zsparse, Btsparse;

    /*BT_i.allocate(0,0,0);
    B_j.allocate(0,0,0);
    Xsparse.allocate(0,0,0);
    Zsparse.allocate(0,0,0);
    Btsparse.allocate(0,0,0);*/

    //Initialise MPI and some MPI-variables
    info = MPI_Init ( &argc, &argv );
    if ( info != 0 ) {
        printf ( "Error in MPI initialisation: %d\n",info );
        return info;
    }

    position= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( position==NULL ) {
        printf ( "unable to allocate memory for processor position coordinate\n" );
        return EXIT_FAILURE;
    }

    dims= ( int* ) calloc ( 2,sizeof ( int ) );
    if ( dims==NULL ) {
        printf ( "unable to allocate memory for grid dimensions coordinate\n" );
        return EXIT_FAILURE;
    }

    //BLACS is the interface used by PBLAS and ScaLAPACK on top of MPI

    blacs_pinfo_ ( &iam,&size ); 				//determine the number of processes involved
    info=MPI_Dims_create ( size, 2, dims );			//determine the best 2D cartesian grid with the number of processes
    if ( info != 0 ) {
        printf ( "Error in MPI creation of dimensions: %d\n",info );
        return info;
    }

    //Until now the code can only work with square process grids
    //So we try to get the biggest square grid possible with the number of processes involved
    if (*dims != *(dims+1)) {
        while (*dims * *dims > size)
            *dims -=1;
        *(dims+1)= *dims;
        if (iam==0)
            printf("WARNING: %d processor(s) unused due to reformatting to a square process grid\n", size - (*dims * *dims));
        size = *dims * *dims;
        //cout << "New size of process grid: " << size << endl;
    }

    blacs_get_ ( &i_negone,&i_zero,&ICTXT2D );

    //Initialisation of the BLACS process grid, which is referenced as ICTXT2D
    blacs_gridinit_ ( &ICTXT2D,"R",dims, dims+1 );

    if (iam < size) {

        //The rank (iam) of the process is mapped to a 2D grid: position= (process row, process column)
        blacs_pcoord_ ( &ICTXT2D,&iam,position, position+1 );
        if ( *position ==-1 ) {
            printf ( "Error in proces grid\n" );
            return -1;
        }

        //Filenames, dimensions of all matrices and other important variables are read in as global variables (see src/readinput.cpp)
        info=read_input ( *++argv );
        if ( info!=0 ) {
            printf ( "Something went wrong when reading input file for processor %d\n",iam );
            return -1;
        }

        //blacs_barrier is used to stop any process of going beyond this point before all processes have made it up to this point.
        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if ( * ( position+1 ) ==0 && *position==0 )
            printf ( "Reading of input-file succesful\n" );

        if ( * ( position+1 ) ==0 && *position==0 ) {
            printf("\nA linear mixed model with %d observations, %d genotypes, %d random effects and %d fixed effects\n", n,k,m,l);
            printf("was analyzed using %d (%d x %d) processors\n",size,*dims,*(dims+1));
        }

        //Dimension of A (sparse matrix) is the number of fixed effects(m) + the sparse random effects (l)
        Adim=m+l;

        //Dimension of D (dense matrix) is the number of dense effects (k)
        Ddim=k;

        pcol= * ( position+1 );

        //Define number of blocks needed to store a complete column/row of D
        Dblocks= Ddim%blocksize==0 ? Ddim/blocksize : Ddim/blocksize +1;

        //Define the number of rowblocks needed by the current process to store its part of the dense matrix D
        Drows= ( Dblocks - *position ) % *dims == 0 ? ( Dblocks- *position ) / *dims : ( Dblocks- *position ) / *dims +1;
        Drows= Drows<1? 1 : Drows;

        //Define the number of columnblocks needed by the current process to store its part of the dense matrix D
        Dcols= ( Dblocks - pcol ) % * ( dims+1 ) == 0 ? ( Dblocks- pcol ) / * ( dims+1 ) : ( Dblocks- pcol ) / * ( dims+1 ) +1;
        Dcols=Dcols<1? 1 : Dcols;

        //Define the local leading dimension of D (keeping in mind that matrices are always stored column-wise)
        lld_D=Drows*blocksize;

        //Initialise the descriptor of the dense distributed matrix
        DESCD= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCD==NULL ) {
            printf ( "unable to allocate memory for descriptor for C\n" );
            return -1;
        }

        //D with dimensions (Ddim,Ddim) is distributed over all processes in ICTXT2D, with the first element in process (0,0)
        //D is distributed into blocks of size (blocksize,blocksize), having a local leading dimension lld_D in this specific process
        descinit_ ( DESCD, &Ddim, &Ddim, &blocksize, &blocksize, &i_zero, &i_zero, &ICTXT2D, &lld_D, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        //Allocate the space necessary to store the part of D that is held into memory of this process.
        D = ( double* ) calloc ( Drows * blocksize * Dcols * blocksize,sizeof ( double ) );
        if ( D==NULL ) {
            printf ( "unable to allocate memory for Matrix D  (required: %ld bytes)\n", Drows * blocksize * Dcols * blocksize * sizeof ( double ) );
            return EXIT_FAILURE;
        }

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Start set up of B & D\n" );

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        //set_up_BD is declared in readdist.cpp and constructs the parts of matrices B & D in each processor
        //which are necessary to create the distributed Schur complement of D
        info = set_up_BD ( DESCD, D, BT_i, B_j, Btsparse );

        //printdense(Drows*blocksize, Dcols * blocksize,D,"matrix_D.txt");

        blacs_barrier_ ( &ICTXT2D,"ALL" );
        if (iam==0)
            printf ( "Matrices B & D set up\n" );

        if(printD_bool) {

            int array_of_gsizes[2], array_of_distribs[2], array_of_dargs[2], array_of_psize[2] ;
            int buffersize;
            MPI_Datatype file_type;
            MPI_File fh;
            MPI_Status status;
            array_of_gsizes[0]=Dblocks * blocksize;
            array_of_gsizes[1]=Dblocks * blocksize;
            array_of_distribs[0]=MPI_DISTRIBUTE_CYCLIC;
            array_of_distribs[1]=MPI_DISTRIBUTE_CYCLIC;
            array_of_dargs[0]=blocksize;
            array_of_dargs[1]=blocksize;
            array_of_psize[0]=*dims;
            array_of_psize[1]=*(dims + 1);

            MPI_Type_create_darray(size,iam,2,array_of_gsizes, array_of_distribs,
                                   array_of_dargs, array_of_psize, MPI_ORDER_FORTRAN,
                                   MPI_DOUBLE, &file_type);
            MPI_Type_commit(&file_type);
            info = MPI_File_open(MPI_COMM_WORLD, filenameD,
                                 MPI_MODE_CREATE | MPI_MODE_WRONLY,
                                 MPI_INFO_NULL, &fh);
            /*if ( ( Drows-1 ) % *(dims+1) == *position && ( Dcols-1 ) % *(dims) == pcol && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * ((Dcols-1) * blocksize + Ddim % blocksize);
            else if ( ( Drows-1 ) % *(dims+1) == *position && Ddim%blocksize !=0 )
                buffersize=((Drows-1) * blocksize + Ddim % blocksize) * Dcols * blocksize;
            else if ( ( Dcols-1 ) % *(dims) == *position && Ddim%blocksize !=0 )
                buffersize=((Dcols-1) * blocksize + Ddim % blocksize) * Drows * blocksize;
            else*/
            buffersize= Dcols * Drows * blocksize * blocksize;

            MPI_File_set_view(fh, 0, MPI_DOUBLE, file_type, "native", MPI_INFO_NULL);
            info =MPI_File_write_all(fh, D,buffersize, MPI_DOUBLE,
                                     &status);
	    MPI_File_close(&fh);
            if(iam==0) {
                printf("Matrix D (dimension %d) is printed in file %s\n", Dblocks*blocksize,filenameD);
            }
            if(filenameD != NULL)
                free(filenameD);
            filenameD=NULL;
            //delete[] array_of_gsizes, delete[] array_of_distribs, delete[] array_of_dargs, delete[] array_of_psize;
        }



        //Now every matrix has to set up the sparse matrix A, consisting of X'X, X'Z, Z'X and Z'Z + lambda*I
        Xsparse.loadFromFile ( filenameX );
        Zsparse.loadFromFile ( filenameZ );

        if(filenameX != NULL)
            free(filenameX);
        filenameX=NULL;
        if(filenameZ != NULL)
            free(filenameZ);
        filenameZ=NULL;

        smat_t *X_smat, *Z_smat;

        X_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Z_smat= (smat_t *) calloc(1,sizeof(smat_t));

        X_smat = smat_new_from ( Xsparse.nrows,Xsparse.ncols,Xsparse.pRows,Xsparse.pCols,Xsparse.pData,0,0 );
        Z_smat = smat_new_from ( Zsparse.nrows,Zsparse.ncols,Zsparse.pRows,Zsparse.pCols,Zsparse.pData,0,0 );

        smat_t *Xt_smat, *Zt_smat;
        Xt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Zt_smat= (smat_t *) calloc(1,sizeof(smat_t));
        Xt_smat = smat_copy_trans ( X_smat );
        Zt_smat = smat_copy_trans ( Z_smat );

        CSRdouble Asparse;
        smat_t *XtX_smat, *XtZ_smat, *ZtZ_smat, *lambda_smat, *ZtZlambda_smat;

        XtX_smat= (smat_t *) calloc(1,sizeof(smat_t));
        XtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));
        ZtZ_smat= (smat_t *) calloc(1,sizeof(smat_t));


        XtX_smat = smat_matmul ( Xt_smat, X_smat );
        XtZ_smat = smat_matmul ( Xt_smat, Z_smat );
        ZtZ_smat = smat_matmul ( Zt_smat,Z_smat );

        Xsparse.clear();
        Zsparse.clear();
        smat_free(Xt_smat);
        smat_free(Zt_smat);
        /*smat_free(X_smat);
        smat_free(Z_smat);*/

        CSRdouble Imat;

        makeIdentity ( l, Imat );

        lambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        lambda_smat = smat_new_from ( Imat.nrows,Imat.ncols,Imat.pRows,Imat.pCols,Imat.pData,0,0 );

        smat_scale_diag ( lambda_smat, -lambda );

        ZtZlambda_smat= (smat_t *) calloc(1,sizeof(smat_t));

        ZtZlambda_smat = smat_add ( lambda_smat, ZtZ_smat );

        smat_free(ZtZ_smat);
        //smat_free(lambda_smat);


        smat_to_symmetric_structure ( XtX_smat );
        smat_to_symmetric_structure ( ZtZlambda_smat );

        CSRdouble XtX_sparse, XtZ_sparse, ZtZ_sparse;

        XtX_sparse.make2 ( XtX_smat->m,XtX_smat->n,XtX_smat->nnz,XtX_smat->ia,XtX_smat->ja,XtX_smat->a );
        XtZ_sparse.make2 ( XtZ_smat->m,XtZ_smat->n,XtZ_smat->nnz,XtZ_smat->ia,XtZ_smat->ja,XtZ_smat->a );
        ZtZ_sparse.make2 ( ZtZlambda_smat->m,ZtZlambda_smat->n,ZtZlambda_smat->nnz,ZtZlambda_smat->ia,ZtZlambda_smat->ja,ZtZlambda_smat->a );

        /*smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);*/
        Imat.clear();

        if (iam==0) {
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ X X   X Z ] *** " << endl;
            cout << "***                                           [           ] *** " << endl;
            cout << "*** G e n e r a t i n g    m a t r i x    A = [           ] *** " << endl;
            cout << "***                                           [  t     t  ] *** " << endl;
            cout << "***                                           [ Z X   Z Z ] *** " << endl;
        }

        //Sparse matrix A only contains the upper triangular part of A
        create2x2SymBlockMatrix ( XtX_sparse, XtZ_sparse, ZtZ_sparse, Asparse );
        //Asparse.writeToFile("A_sparse.csr");

        smat_free(XtX_smat);
        smat_free(XtZ_smat);
        smat_free(ZtZlambda_smat);
        XtX_sparse.clear();
        XtZ_sparse.clear();
        ZtZ_sparse.clear();

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        if(printsparseC_bool) {
            CSRdouble Dmat, Dblock, Csparse;
            Dblock.nrows=Dblocks * blocksize;
            Dblock.ncols=Dblocks * blocksize;
            Dblock.allocate(Dblocks * blocksize, Dblocks * blocksize, 0);
            Dmat.allocate(0,0,0);
            for (i=0; i<Drows; ++i) {
                for(j=0; j<Dcols; ++j) {
                    dense2CSR_sub(D + i * blocksize + j * lld_D * blocksize,blocksize,blocksize,lld_D,Dblock,( * ( dims) * i + *position ) *blocksize,
                                  ( * ( dims+1 ) * j + pcol ) *blocksize);
                    if ( Dblock.nonzeros>0 ) {
                        if ( Dmat.nonzeros==0 ) {
                            Dmat.make2 ( Dblock.nrows,Dblock.ncols,Dblock.nonzeros,Dblock.pRows,Dblock.pCols,Dblock.pData );
                        }
                        else {
                            Dmat.addBCSR ( Dblock );
                        }
                    }

                    Dblock.clear();
                }
            }
            blacs_barrier_(&ICTXT2D,"A");
            if ( iam!=0 ) {
                //Each process other than root sends its Dmat to the root process.
                MPI_Send ( & ( Dmat.nonzeros ),1, MPI_INT,0,iam,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pRows[0] ),Dmat.nrows + 1, MPI_INT,0,iam+size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pCols[0] ),Dmat.nonzeros, MPI_INT,0,iam+2*size,MPI_COMM_WORLD );
                MPI_Send ( & ( Dmat.pData[0] ),Dmat.nonzeros, MPI_DOUBLE,0,iam+3*size,MPI_COMM_WORLD );
                Dmat.clear();
            }
            else {
                for ( i=1; i<size; ++i ) {
                    // The root process receives parts of Dmat sequentially from all processes and directly adds them together.
                    int nonzeroes, count;
                    MPI_Recv ( &nonzeroes,1,MPI_INT,i,i,MPI_COMM_WORLD,&status );
                    /*MPI_Get_count(&status, MPI_INT, &count);
                    printf("Process 0 received %d elements of process %d\n",count,i);*/
                    if(nonzeroes>0) {
                        printf("Nonzeroes : %d\n ",nonzeroes);
                        Dblock.allocate ( Dblocks * blocksize,Dblocks * blocksize,nonzeroes );
                        MPI_Recv ( & ( Dblock.pRows[0] ), Dblocks * blocksize + 1, MPI_INT,i,i+size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pCols[0] ),nonzeroes, MPI_INT,i,i+2*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_INT, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        MPI_Recv ( & ( Dblock.pData[0] ),nonzeroes, MPI_DOUBLE,i,i+3*size,MPI_COMM_WORLD,&status );
                        /*MPI_Get_count(&status, MPI_DOUBLE, &count);
                        printf("Process 0 received %d elements of process %d\n",count,i);*/
                        Dmat.addBCSR ( Dblock );
                    }
                }
                //Dmat.writeToFile("D_sparse.csr");
                Dmat.reduceSymmetric();
                Btsparse.transposeIt(1);
                create2x2SymBlockMatrix(Asparse,Btsparse, Dmat, Csparse);
                Btsparse.clear();
                Dmat.clear();
                Csparse.writeToFile(filenameC);
                Csparse.clear();
                if(filenameC != NULL)
                    free(filenameC);
                filenameC=NULL;
            }
        }
        Btsparse.clear();
        blacs_barrier_(&ICTXT2D,"A");

        //AB_sol will contain the solution of A*X=B, distributed across the process rows. Processes in the same process row possess the same part of AB_sol
        double * AB_sol;
        int * DESCAB_sol;
        DESCAB_sol= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCAB_sol==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //AB_sol (Adim, Ddim) is distributed across all processes in ICTXT2D starting from process (0,0) into blocks of size (Adim, blocksize)
        descinit_ ( DESCAB_sol, &Adim, &Ddim, &Adim, &blocksize, &i_zero, &i_zero, &ICTXT2D, &Adim, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        AB_sol=(double *) calloc(Adim * Dcols*blocksize,sizeof(double));

        // Each process calculates the Schur complement of the part of D at its disposal. (see src/schur.cpp)
        // The solution of A * Y = B_j is stored in AB_sol (= A^-1 * B_j)
        blacs_barrier_(&ICTXT2D,"A");
        make_Sij_parallel_denseB ( Asparse, BT_i, B_j, D, lld_D, AB_sol );
        BT_i.clear();
        B_j.clear();

        //From here on the Schur complement S of D is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is factorised (by ScaLAPACK)
        pdpotrf_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Cholesky decomposition of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the factorization of the Schur complement S is stored in D

        blacs_barrier_ ( &ICTXT2D,"ALL" );

        //The Schur complement is inverted (by ScaLAPACK)
        pdpotri_ ( "U",&k,D,&i_one,&i_one,DESCD,&info );
        if ( info != 0 ) {
            printf ( "Inverse of D was unsuccessful, error returned: %d\n",info );
            return -1;
        }

        //From here on the inverse of the Schur complement S is stored in D

        blacs_barrier_(&ICTXT2D,"A");

        double* InvD_T_Block = ( double* ) calloc ( Dblocks * blocksize + Adim ,sizeof ( double ) );

        //Diagonal elements of the (1,1) block of C^-1 are still distributed and here they are gathered in InvD_T_Block in the root process.
        if(*position == pcol) {
            for (i=0; i<Ddim; ++i) {
                if (pcol == (i/blocksize) % *dims) {
                    int Dpos = i%blocksize + ((i/blocksize) / *dims) * blocksize ;
                    *(InvD_T_Block + Adim +i) = *( D + Dpos + lld_D * Dpos);
                }
            }
            for ( i=0,j=0; i<Dblocks; ++i,++j ) {
                if ( j==*dims )
                    j=0;
                if ( *position==j ) {
                    dgesd2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + i * blocksize,&blocksize,&i_zero,&i_zero );
                }
                if ( *position==0 ) {
                    dgerv2d_ ( &ICTXT2D,&blocksize,&i_one,InvD_T_Block + Adim + blocksize*i,&blocksize,&j,&j );
                }
            }
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Only the root process performs a selected inversion of A.
        if (iam==0) {

            int pardiso_message_level = 1;

            int pardiso_mtype=-2;

            ParDiSO pardiso ( pardiso_mtype, pardiso_message_level );
            int number_of_processors = 1;
            char* var = getenv("OMP_NUM_THREADS");
            if(var != NULL) {
                sscanf( var, "%d", &number_of_processors );
            }
            else {
                printf("Set environment OMP_NUM_THREADS to 1");
                exit(1);
            }

            pardiso.iparm[2]  = 2;
            pardiso.iparm[3]  = number_of_processors;
            pardiso.iparm[8]  = 0;
            pardiso.iparm[11] = 1;
            pardiso.iparm[13]  = 0;
            pardiso.iparm[28]  = 0;

            //This function calculates the factorisation of A once again so this might be optimized.
            pardiso.findInverseOfA ( Asparse );

            printf("Processor %d inverted matrix A\n",iam);
        }
        blacs_barrier_(&ICTXT2D,"A");

        // To minimize memory usage, and because only the diagonal elements of the inverse are needed, Y' * S is calculated row by rowblocks
        // the diagonal element is calculates as the dot product of this row and the corresponding column of Y. (Y is solution of AY=B)
        double* YSrow= ( double* ) calloc ( Dcols * blocksize,sizeof ( double ) );
        int * DESCYSROW;
        DESCYSROW= ( int* ) malloc ( DLEN_ * sizeof ( int ) );
        if ( DESCYSROW==NULL ) {
            printf ( "unable to allocate memory for descriptor for AB_sol\n" );
            return -1;
        }
        //YSrow (1,Ddim) is distributed across processes of ICTXT2D starting from process (0,0) into blocks of size (1,blocksize)
        descinit_ ( DESCYSROW, &i_one, &Ddim, &i_one,&blocksize, &i_zero, &i_zero, &ICTXT2D, &i_one, &info );
        if ( info!=0 ) {
            printf ( "Descriptor of matrix C returns info: %d\n",info );
            return info;
        }

        blacs_barrier_(&ICTXT2D,"A");

        //Calculating diagonal elements 1 by 1 of the (0,0)-block of C^-1.
        for (i=1; i<=Adim; ++i) {
            pdsymm_ ("R","U",&i_one,&Ddim,&d_one,D,&i_one,&i_one,DESCD,AB_sol,&i,&i_one,DESCAB_sol,&d_zero,YSrow,&i_one,&i_one,DESCYSROW);
            pddot_(&Ddim,InvD_T_Block+i-1,AB_sol,&i,&i_one,DESCAB_sol,&Adim,YSrow,&i_one,&i_one,DESCYSROW,&i_one);
            /*if(*position==1 && pcol==1)
            printf("Dot product in process (1,1) is: %g\n", *(InvD_T_Block+i-1));
            if(*position==0 && pcol==1)
            printf("Dot product in process (0,1) is: %g\n",*(InvD_T_Block+i-1));*/
        }
        blacs_barrier_(&ICTXT2D,"A");
        if(YSrow != NULL)
            free(YSrow);
        YSrow = NULL;
        if(DESCYSROW != NULL)
            free(DESCYSROW);
        DESCYSROW = NULL;
        if(AB_sol != NULL)
            free(AB_sol);
        AB_sol = NULL;
        if(DESCAB_sol != NULL)
            free(DESCAB_sol);
        DESCAB_sol = NULL;
        if(D != NULL)
            free(D);
        D = NULL;
        if(DESCD != NULL)
            free(DESCD);
        DESCD = NULL;

        //Only in the root process we add the diagonal elements of A^-1
        if (iam ==0) {
            for(i=0; i<Adim; ++i) {
                j=Asparse.pRows[i];
                *(InvD_T_Block+i) += Asparse.pData[j];
            }
            Asparse.clear();
            printdense ( Adim+k,1,InvD_T_Block,"diag_inverse_C_parallel.txt" );
        }
        if(InvD_T_Block != NULL)
            free(InvD_T_Block);
        InvD_T_Block = NULL;
	blacs_gridexit_(&ICTXT2D);
    }
    //cout << iam << " reached end before MPI_Barrier" << endl;
    MPI_Barrier(MPI_COMM_WORLD);
    //MPI_Finalize();

    return 0;
}
Ejemplo n.º 28
0
static int test_indexed_with_zeros(char *filename, int testcase)
{
    int i, rank, np, buflen, num, err, nr_errors=0;
    int  nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN];
    MPI_File fh;
    MPI_Status status;
    MPI_Datatype filetype;
    MPI_Datatype types[MAXLEN];
    MPI_Aint addrs[MAXLEN];

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &np);

    /* set up the number of integers to write in each iteration */
    for (i=0; i<MAXLEN; i++) nelms[i] = 0;
    if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1;
    if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1;

    /* pre-fill the file with integers -999 */
    if (rank == 0) {
        for (i=0; i<MAXLEN; i++) buf[i] = -999;
	err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY,
		MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    /* define a filetype with spurious leading zeros */
    buflen = num = 0;
    for (i=0; i<MAXLEN; i++) {
        buflen       += nelms[i];
        indices[num]  = i;
        addrs[num] = i*sizeof(int);
        blocklen[num] = nelms[i];
        types[num] = MPI_INT;
        num++;
    }
    switch (testcase) {
	case INDEXED:
	    MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype);
	    break;
	case HINDEXED:
	    MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype);
	    break;
	case STRUCT:
	    MPI_Type_create_struct(num, blocklen, addrs, types, &filetype);
	    break;
	default:
	    fprintf(stderr, "unknown testcase!\n");
	    return(-100);

    }

    MPI_Type_commit(&filetype);

    /* initialize write buffer and write to file*/
    for (i=0; i<MAXLEN; i++) buf[i] = 1;
    err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
    err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view");
    err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all");
    MPI_Type_free(&filetype);
    err = MPI_File_close(&fh);
    if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");

    /* read back and check */
    if (rank == 0) {
        err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open");
        err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read");
        err = MPI_File_close(&fh);
	if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close");
        for (i=0; i<MAXLEN; i++) {
            if (buf[i] < 0) {
		nr_errors++;
                printf("Error: unexpected value for case %d at buf[%d] == %d\n",
			testcase,i,buf[i]);
	    }
	}
    }
    return nr_errors;
}
/* *************************** *
 *  Main computational kernel  *
 * *************************** */
int correlationKernel(int rank,
                      int size,
                      double* dataMatrixX,
                      double* dataMatrixY,
                      int columns,
                      int rows,
                      char *out_filename,
                      int distance_flag) {

    int local_check = 0, global_check = 0;
    int i = 0, j, taskNo;
    int err, count = 0;
    unsigned long long fair_chunk = 0, coeff_count = 0;
    unsigned int init_and_cleanup_loop_iter=0;
    unsigned long long cor_cur_size = 0;
    
    double start_time, end_time;

    // Variables needed by the Indexed Datatype
    MPI_Datatype coeff_index_dt;
    MPI_File fh;
    int *blocklens, *indices;

    MPI_Status stat;
    MPI_Comm comm = MPI_COMM_WORLD;

    // Master processor keeps track of tasks
    if (rank == 0) {

        // Make sure everything will work fine even if there are
        // less genes than available workers (there are size-1 workers
        // master does not count)
        if ( (size-1) > rows )
            init_and_cleanup_loop_iter = rows+1;
        else
            init_and_cleanup_loop_iter = size;

        // Start timer
        start_time = MPI_Wtime();

        // Send out initial tasks (remember you have size-1 workers, master does not count)
        for (i=1; i<init_and_cleanup_loop_iter; i++) {
            taskNo = i-1;
            err = MPI_Send(&taskNo, 1, MPI_INT, i, 0, comm);
        }        

        // Terminate any processes that were not working due to the fact
        // that the number of rows where less than the actual available workers
        for(i=init_and_cleanup_loop_iter; i < size; i++) {
            PROF(rank, "\nPROF_idle : Worker %d terminated due to insufficient work load", i);
            err = -1;
            err = MPI_Send(&err, 1, MPI_INT, i, 0, comm);
        }

        // Wait for workers to finish their work assignment and ask for more
        for (i=init_and_cleanup_loop_iter-1; i<rows; i++) {
            err = MPI_Recv(&taskNo, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, &stat);

            // Check taskNo to make sure everything is ok. Negative means there is problem
            // thus terminate gracefully all remaining working workers
            if ( taskNo < 0 ) {
                // Reduce by one because one worker is already terminated
                init_and_cleanup_loop_iter--;
                // Break and cleanup
                break;
            }

            // The sending processor is ready to work:
            // It's ID is in stat.MPI_SOURCE
            // Send it the current task (i)
            err = MPI_Send(&i, 1, MPI_INT, stat.MPI_SOURCE, 0, comm);
        }

        // Clean up processors
        for (i=1; i<init_and_cleanup_loop_iter; i++) {
            // All tasks complete - shutdown workers
            err = MPI_Recv(&taskNo, 1, MPI_INT, MPI_ANY_SOURCE, 0, comm, &stat);
            // If process failed then it will not be waiting to receive anything
            // We have to ignore the send because it will deadlock
            if ( taskNo < 0 )
                continue;
            err = -1;
            err = MPI_Send(&err, 1, MPI_INT, stat.MPI_SOURCE, 0, comm);
        }

        // Master is *always* OK
        local_check = 0;
        MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

        // Check failed, abort
        if ( global_check != 0 ) {
            return -1;
        }
        
        // Stop timer
        end_time = MPI_Wtime();
        PROF(rank, "\nPROF_comp (workers=%d) : Time taken by correlation coefficients computations : %g\n", size-1, end_time - start_time);

        // Start timer
        start_time = MPI_Wtime();

        // Master process must call MPI_File_set_view as well, it's a collective call
        // Open the file handler
        MPI_File_open(comm, out_filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);

        // Create the file view
        MPI_File_set_view(fh, 0, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);

        // Write data to disk
        MPI_File_write_all(fh, &cor[0], 0, MPI_DOUBLE, &stat);

        // Stop timer
        end_time = MPI_Wtime();
        PROF(rank, "\nPROF_write (workers=%d) : Time taken for global write-file : %g\n",  size-1, end_time - start_time);

    } else {

        // Compute how many workers will share the work load
        // Two scenarios exist:
        // (1) more OR equal number of workers and rows exist
        // (2) more rows than workers
        if ( (size-1) > rows ) {
            // For this scenario each worker will get exaclty one work asssignment.
            // There is not going to be any other work so it only compute "rows" number
            // of coefficients
            fair_chunk = rows;
            cor_cur_size = fair_chunk;
        } else {
            // For this scenario we are going to allocate space equal to a fair
            // distribution of work assignments *plus* an extra amount of space to
            // cover any load imbalancing. This amount is expressed as a percentage
            // of the fair work distribution (see on top, 20% for now)

            // Plus 1 to round it up or just add some extra space, both are fine
            fair_chunk = (rows / (size-1)) + 1;
            DEBUG("fair_chunk %d \n", fair_chunk);

            // We can use "j" as temporary variable.
            // Plus 1 to avoid getting 0 from the multiplication.
            j = (fair_chunk * MEM_PERC) + 1;

            cor_cur_size = (fair_chunk + j) * rows;
            DEBUG("cor_cur_size %lld \n", cor_cur_size);
        }

        // Allocate memory
        DEBUG("cor_cur_size %lld \n", cor_cur_size);
        long long double_size = sizeof(double);
        DEBUG("malloc size %lld \n", (double_size * cor_cur_size));
        cor = (double *)malloc(double_size * cor_cur_size);

        blocklens = (int *)malloc(sizeof(int) * rows);
        indices = (int *)malloc(sizeof(int) * rows);

        mean_value_vectorX = (double *)malloc(sizeof(double) * rows);
        Sxx_vector = (double *)malloc(sizeof(double) * rows);
        mean_value_vectorY = (double *)malloc(sizeof(double) * rows);
        Syy_vector = (double *)malloc(sizeof(double) * rows);

        // Check that all memory is successfully allocated
        if ( ( cor == NULL ) || ( blocklens == NULL ) || ( indices == NULL ) || 
             ( mean_value_vectorX == NULL ) || ( Sxx_vector == NULL ) ||
             ( mean_value_vectorY == NULL ) || ( Syy_vector == NULL ) ) {
            ERR("**ERROR** : Memory allocation failed on worker process %d. Aborting.\n", rank);

            // Free allocated memory
            free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

            // Let the master process know its aborting in order to terminate
            // the rest of the working workers
            // We have to receive a work assignment first and then terminate
            // otherwise the master will deadlock trying to give work to this worker
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);
            taskNo = -1;
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

            // This worker failed
            local_check = 1;
            MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

            return -1;
        }

        // Compute necessary parameters for Pearson method
        // (this will transform the values of the input array to more meaningful data
        //  and save us from a lot of redundant computations)
        compute_parameters(dataMatrixX, dataMatrixY, rows, columns);

        // Main loop for workers. They get work from master, compute coefficients,
        // save them to their *local* vector and ask for more work
        for(;;) {
            // Get work
            err = 0;
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);

            // If received task is -1, function is terminated
            if ( taskNo == -1 )  break;

            // Check if there is enough memory to store the new coefficients, if not reallocate
            // the current memory and expand it by MEM_PERC of the approximated size
            if ( cor_cur_size < (coeff_count + rows) ) {
                PROF(0, "\n**WARNING** : Worker process %3d run out of memory and reallocates. Potential work imbalancing\n", rank);
                DEBUG("\n**WARNING** : Worker process %3d run out of memory and reallocates. Potential work imbalancing\n", rank);

                // Use j as temporary again. Add two (or any other value) to avoid 0.
                // (two is just a random value, you can put any value really...)
                j = (fair_chunk * MEM_PERC) + 2;
                cor_cur_size += (j * rows);

                // Reallocate and check
                cor = (double *)realloc(cor, sizeof(double) * cor_cur_size);
                if ( cor == NULL ) {
                    ERR("**ERROR** : Memory re-allocation failed on worker process %d. Aborting.\n", rank);

                    // Let the master process know its aborting in order to terminate
                    // the rest of the working workers
                    taskNo = -1;
                    err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

                    // This worker failed
                    local_check = 1;
                    MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

                    // Free all allocated memory
                    free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

                    return -1;
                }
            }

            // Compute the correlation coefficients
            if(dataMatrixY != NULL) {
              for (j=0; j < rows; j++) {
                cor[coeff_count] = pearson_XY(dataMatrixX, dataMatrixY, j, taskNo, columns);
                coeff_count++;
              }

            } else {
              for (j=0; j < rows; j++) {
                // Set main diagonal to 1
                if ( j == taskNo ) {
                  cor[coeff_count] = 1.0;
                  coeff_count++;
                  continue;
                }
                cor[coeff_count] = pearson(dataMatrixX, taskNo, j, columns);
                coeff_count++;
              }
            }

            // The value of blocklens[] represents the number of coefficients on each
            // row of the corellation array
            blocklens[count] = rows;

            // The value of indices[] represents the offset of each row in the data file
            indices[count] = (taskNo * rows);
            count++;

            // Give the master the taskID
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);
        }

        // There are two possibilities
        //   (a) everything went well and all workers finished ok
        //   (b) some processes finished ok but one or more of the remaining working workers failed
        // To make sure all is well an all-reduce will be performed to sync all workers and guarantee success
        // before moving on to write the output file
        // This worker is OK
        local_check = 0;
        MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

        // Check failed
        if ( global_check != 0 ) {
            // Free all allocated memory
          free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);
            return -1;
        }

        PROF(0, "\nPROF_stats (thread %3d) : Fair chunk of work : %d \t\t Allocated : %d \t\t Computed : %d\n",
                rank, fair_chunk, cor_cur_size, coeff_count);

        // If the distance_flag is set, then transform all correlation coefficients to distances
        if ( distance_flag == 1 ) {
            for(j=0; j < coeff_count; j++) {
                cor[j] = 1 - cor[j];
            }
        }

        // Create and commit the Indexed datatype *ONLY* if there are data available
        if ( coeff_count != 0 ) {
            MPI_Type_indexed(count, blocklens, indices, MPI_DOUBLE, &coeff_index_dt);
            MPI_Type_commit(&coeff_index_dt);
        }

        // Open the file handler
        MPI_File_open(comm, out_filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);

        // Create the file view
        if ( coeff_count != 0 ) {
            MPI_File_set_view(fh, 0, MPI_DOUBLE, coeff_index_dt, "native", MPI_INFO_NULL);
        } else {
            MPI_File_set_view(fh, 0, MPI_DOUBLE, MPI_DOUBLE, "native", MPI_INFO_NULL);
        }

        // Write data to disk
        // TODO coeff_count cannot be greater than max int (for use in the MPI_File_write_all call). 
        // A better fix should be possible, for now throw error.
        
        DEBUG("\ncoeff_count is %lld\n", coeff_count);
        DEBUG("\INT_MAX is %d\n", INT_MAX);
        if(coeff_count>INT_MAX)
        {
            ERR("**ERROR** : Could not run as the chunks of data are too large. Try running again with more MPI processes.\n");

            // Free allocated memory
            free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);

            // Let the master process know its aborting in order to terminate
            // the rest of the working workers
            // We have to receive a work assignment first and then terminate
            // otherwise the master will deadlock trying to give work to this worker
            err = MPI_Recv(&taskNo, 1, MPI_INT, 0, 0, comm, &stat);
            taskNo = -1;
            err = MPI_Send(&taskNo, 1, MPI_INT, 0, 0, comm);

            // This worker failed
            local_check = 1;
            MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

            return -1;
        }

        
        
        DEBUG("\nWriting %d to disk\n", coeff_count);

        MPI_File_write_all(fh, &cor[0], coeff_count, MPI_DOUBLE, &stat);

        if (coeff_count != 0 )
            MPI_Type_free(&coeff_index_dt);

        // Free all allocated memory
        free_all(cor, blocklens, indices, mean_value_vectorX, Sxx_vector, mean_value_vectorY, Syy_vector);
    }

         DEBUG("\nAbout to write to disk %d\n", rank);
    MPI_File_sync( fh ) ;   		// Causes all previous writes to be transferred to the storage device
         DEBUG("\nWritten to disk %d\n",rank);
  //  MPI_Barrier( MPI_COMM_WORLD ) ; 	// Blocks until all processes in the communicator have reached this routine.
         DEBUG("\nAfter barrier \n", rank);

    // Close file handler
    MPI_File_close(&fh);
  DEBUG("\nAfter file closed /n");
   // MPI_Barrier( MPI_COMM_WORLD ) ; 	// Blocks until all processes in the communicator have reached this routine.
      DEBUG("\nAbout to return from kernel /n");
      return 0;
}
Ejemplo n.º 30
0
int main(int argc, char **argv)
{
    int *buf, i, mynod, nprocs, len, b[3];
    int errs = 0, toterrs;
    MPI_Aint d[3];
    MPI_File fh;
    MPI_Status status;
    char *filename;
    MPI_Datatype typevec, newtype, t[3];

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

    if (nprocs != 2) {
        fprintf(stderr, "Run this program on two processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

/* process 0 takes the file name as a command-line argument and
   broadcasts it to other processes */
    if (!mynod) {
        i = 1;
        while ((i < argc) && strcmp("-fname", *argv)) {
            i++;
            argv++;
        }
        if (i >= argc) {
            fprintf(stderr, "\n*#  Usage: noncontig_coll -fname filename\n\n");
            MPI_Abort(MPI_COMM_WORLD, 1);
        }
        argv++;
        len = strlen(*argv);
        filename = (char *) malloc(len + 1);
        strcpy(filename, *argv);
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD);
    } else {
        MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
        filename = (char *) malloc(len + 1);
        MPI_Bcast(filename, len + 1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }

    buf = (int *) malloc(SIZE * sizeof(int));

    MPI_Type_vector(SIZE / 2, 1, 2, MPI_INT, &typevec);

    b[0] = b[1] = b[2] = 1;
    d[0] = 0;
    d[1] = mynod * sizeof(int);
    d[2] = SIZE * sizeof(int);
    t[0] = MPI_LB;
    t[1] = typevec;
    t[2] = MPI_UB;

    MPI_Type_struct(3, b, d, t, &newtype);
    MPI_Type_commit(&newtype);
    MPI_Type_free(&typevec);

    if (!mynod) {
#if VERBOSE
        fprintf(stderr,
                "\ntesting noncontiguous in memory, noncontiguous in file using collective I/O\n");
#endif
        MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_CHECK(MPI_File_open(MPI_COMM_WORLD, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh));

    MPI_CHECK(MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL));

    for (i = 0; i < SIZE; i++)
        buf[i] = i + mynod * SIZE;
    MPI_CHECK(MPI_File_write_all(fh, buf, 1, newtype, &status));

    MPI_Barrier(MPI_COMM_WORLD);

    for (i = 0; i < SIZE; i++)
        buf[i] = -1;

    MPI_CHECK(MPI_File_read_at_all(fh, 0, buf, 1, newtype, &status));

    for (i = 0; i < SIZE; i++) {
        if (!mynod) {
            if ((i % 2) && (buf[i] != -1)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]);
            }
            if (!(i % 2) && (buf[i] != i)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i);
            }
        } else {
            if ((i % 2) && (buf[i] != i + mynod * SIZE)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
                        mynod, i, buf[i], i + mynod * SIZE);
            }
            if (!(i % 2) && (buf[i] != -1)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]);
            }
        }
    }

    MPI_CHECK(MPI_File_close(&fh));

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
        fprintf(stderr,
                "\ntesting noncontiguous in memory, contiguous in file using collective I/O\n");
#endif
        MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_CHECK(MPI_File_open(MPI_COMM_WORLD, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh));

    for (i = 0; i < SIZE; i++)
        buf[i] = i + mynod * SIZE;
    MPI_CHECK(MPI_File_write_at_all(fh, mynod * (SIZE / 2) * sizeof(int),
                                    buf, 1, newtype, &status));

    MPI_Barrier(MPI_COMM_WORLD);

    for (i = 0; i < SIZE; i++)
        buf[i] = -1;

    MPI_CHECK(MPI_File_read_at_all(fh, mynod * (SIZE / 2) * sizeof(int), buf, 1, newtype, &status));

    for (i = 0; i < SIZE; i++) {
        if (!mynod) {
            if ((i % 2) && (buf[i] != -1)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]);
            }
            if (!(i % 2) && (buf[i] != i)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i);
            }
        } else {
            if ((i % 2) && (buf[i] != i + mynod * SIZE)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
                        mynod, i, buf[i], i + mynod * SIZE);
            }
            if (!(i % 2) && (buf[i] != -1)) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]);
            }
        }
    }

    MPI_CHECK(MPI_File_close(&fh));

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
        fprintf(stderr,
                "\ntesting contiguous in memory, noncontiguous in file using collective I/O\n");
#endif
        MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_CHECK(MPI_File_open(MPI_COMM_WORLD, filename,
                            MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh));

    MPI_CHECK(MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL));

    for (i = 0; i < SIZE; i++)
        buf[i] = i + mynod * SIZE;
    MPI_CHECK(MPI_File_write_all(fh, buf, SIZE, MPI_INT, &status));

    MPI_Barrier(MPI_COMM_WORLD);

    for (i = 0; i < SIZE; i++)
        buf[i] = -1;

    MPI_CHECK(MPI_File_read_at_all(fh, 0, buf, SIZE, MPI_INT, &status));

    for (i = 0; i < SIZE; i++) {
        if (!mynod) {
            if (buf[i] != i) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i);
            }
        } else {
            if (buf[i] != i + mynod * SIZE) {
                errs++;
                fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
                        mynod, i, buf[i], i + mynod * SIZE);
            }
        }
    }

    MPI_CHECK(MPI_File_close(&fh));

    MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
    if (mynod == 0) {
        if (toterrs > 0) {
            fprintf(stderr, "Found %d errors\n", toterrs);
        } else {
            fprintf(stdout, " No Errors\n");
        }
    }

    MPI_Type_free(&newtype);
    free(buf);
    free(filename);
    MPI_Finalize();
    return 0;
}