예제 #1
0
int MPIR_File_iwrite_at_cdesc(MPI_File x0, MPI_Offset x1, CFI_cdesc_t* x2, int x3, MPI_Datatype x4, MPIO_Request * x5)
{
    int err = MPI_SUCCESS;
#ifdef MPI_MODE_RDONLY
    void *buf2 = x2->base_addr;
    int count2 = x3;
    MPI_Datatype dtype2 = x4;

    if (buf2 == &MPIR_F08_MPI_BOTTOM) {
        buf2 = MPI_BOTTOM;
    }

    if (x2->rank != 0 && !CFI_is_contiguous(x2)) {
        err = cdesc_create_datatype(x2, x3, x4, &dtype2);
        count2 = 1;
    }

    err = MPI_File_iwrite_at(x0, x1, buf2, count2, dtype2, x5);

    if (dtype2 != x4)  MPI_Type_free(&dtype2);
#else
    err = MPI_ERR_INTERN;
#endif
    return err;
}
int lemonWriteRecordDataNonBlocking(void *source, MPI_Offset const *nbytes, LemonWriter* writer)
{
  int err;

  if (source == NULL || nbytes == 0 || writer == (LemonWriter*)NULL)
  {
    fprintf(stderr, "[LEMON] Node %d reports in lemonWriteRecordDataNonBlocking:\n"
                    "        NULL pointer or uninitialized writer provided.\n", writer->my_rank);
    return LEMON_ERR_PARAM;
  }

  if (writer->is_busy)
    lemonFinishWriting(writer);

  if (writer->my_rank == 0)
    err = MPI_File_iwrite_at(*writer->fp, writer->off + writer->pos, source, *nbytes, MPI_BYTE, &writer->request);

  writer->is_busy = 1;
  writer->is_collective = 0;
  writer->buffer = source;
  writer->bytes_wanted = *nbytes;

  MPI_File_sync(*writer->fp);
  MPI_Bcast(&err, 1, MPI_INT, 0, writer->cartesian);

  if (err != MPI_SUCCESS)
  {
    fprintf(stderr, "[LEMON] Node %d reports in lemonWriteRecordDataNonBlocking:\n"
                    "        MPI_File_iwrite_at returned error %d.\n", writer->my_rank, err);
    return LEMON_ERR_WRITE;
  }
  return LEMON_SUCCESS;
}
void dump_smc_async(grid_parms grid, celltype1 **smc, checkpoint_handle *check,
		int write_count) {
	MPI_Status status;
	MPI_Request request[8];
	MPI_Offset disp;
	int write_element_count, time_offset_in_file;

	write_element_count = grid.num_smc_axially * grid.num_smc_circumferentially;
	time_offset_in_file = write_count * write_element_count * grid.tasks
			* sizeof(double);

	double b1[grid.num_smc_circumferentially * grid.num_smc_axially],
			b2[grid.num_smc_circumferentially * grid.num_smc_axially],
			b3[grid.num_smc_circumferentially * grid.num_smc_axially],
			b4[grid.num_smc_circumferentially * grid.num_smc_axially],
			b5[grid.num_smc_circumferentially * grid.num_smc_axially],
			b6[grid.num_smc_circumferentially * grid.num_smc_axially],
			b7[grid.num_smc_circumferentially * grid.num_smc_axially],
			b8[grid.num_smc_circumferentially * grid.num_smc_axially];
	int k;

	k = 0;
	for (int i = 1; i <= grid.num_smc_circumferentially; i++) {
		for (int j = 1; j <= grid.num_smc_axially; j++) {
			b1[k] = smc[i][j].p[smc_Ca];
			b2[k] = smc[i][j].p[smc_SR];
			b3[k] = smc[i][j].p[smc_Vm];
			b4[k] = smc[i][j].p[smc_w];
			b5[k] = smc[i][j].p[smc_IP3];
			b6[k] = smc[i][j].B[cpl_Ca];
			b7[k] = smc[i][j].B[cpl_Vm];
			b8[k] = smc[i][j].B[cpl_IP3];
			k++;
		}
	}
	disp = time_offset_in_file
			+ (grid.rank * write_element_count * sizeof(double));
	CHECK(
			MPI_File_iwrite_at(check->ci, disp, &b1, write_element_count, MPI_DOUBLE, &request[0]));
	CHECK(
			MPI_File_iwrite_at(check->si, disp, &b2, write_element_count, MPI_DOUBLE, &request[1]));
	CHECK(
			MPI_File_iwrite_at(check->vi, disp, &b3, write_element_count, MPI_DOUBLE, &request[2]));
	CHECK(
			MPI_File_iwrite_at(check->wi, disp, &b4, write_element_count, MPI_DOUBLE, &request[3]));
	CHECK(
			MPI_File_iwrite_at(check->Ii, disp, &b5, write_element_count, MPI_DOUBLE, &request[4]));
	CHECK(
			MPI_File_iwrite_at(check->cpCi, disp, &b6, write_element_count, MPI_DOUBLE, &request[5]));
	CHECK(
			MPI_File_iwrite_at(check->cpVi, disp, &b7, write_element_count, MPI_DOUBLE, &request[6]));
	CHECK(
			MPI_File_iwrite_at(check->cpIi, disp, &b8, write_element_count, MPI_DOUBLE, &request[7]));
}
예제 #4
0
FORTRAN_API void FORT_CALL mpi_file_iwrite_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf,
                       int *count,MPI_Datatype *datatype,
                       MPI_Fint *request, int *ierr )
{
    MPI_File fh_c;
    MPIO_Request req_c;
    
    fh_c = MPI_File_f2c(*fh);
    *ierr = MPI_File_iwrite_at(fh_c,*offset,buf,*count,*datatype,&req_c);
    *request = MPIO_Request_c2f(req_c);
}
예제 #5
0
JNIEXPORT jlong JNICALL Java_mpi_File_iWriteAt(
        JNIEnv *env, jobject jthis, jlong fh, jlong offset,
        jobject buf, jint count, jlong type)
{
    void *ptr = (*env)->GetDirectBufferAddress(env, buf);
    MPI_Request request;

    int rc = MPI_File_iwrite_at((MPI_File)fh, (MPI_Offset)offset,
                                ptr, count, (MPI_Datatype)type, &request);

    ompi_java_exceptionCheck(env, rc);
    return (jlong)request;
}
예제 #6
0
void mpi_file_iwrite_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf,
                       int *count,MPI_Fint *datatype,
                       MPI_Fint *request, int *ierr )
{
    MPI_File fh_c;
    MPIO_Request req_c;
    MPI_Datatype datatype_c;
    
    fh_c = MPI_File_f2c(*fh);
    datatype_c = MPI_Type_f2c(*datatype);

    *ierr = MPI_File_iwrite_at(fh_c,*offset,buf,*count,datatype_c,&req_c);
    *request = MPIO_Request_c2f(req_c);
}
예제 #7
0
void mpi_file_iwrite_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf,
			  MPI_Fint *count, MPI_Fint *datatype,
			  MPI_Fint *request, MPI_Fint *ierr)
{
   MPI_File c_fh = MPI_File_f2c(*fh);
   MPI_Datatype c_type = MPI_Type_f2c(*datatype);
   MPI_Request c_request;
   
   *ierr = OMPI_INT_2_FINT(MPI_File_iwrite_at(c_fh, (MPI_Offset) *offset,
                                              OMPI_F2C_BOTTOM(buf),
                                              OMPI_FINT_2_INT(*count),
                                              c_type, &c_request));
   if (MPI_SUCCESS == OMPI_FINT_2_INT(*ierr)) {
      *request = MPI_Request_c2f(c_request);
   }
}
예제 #8
0
int main(int argc, char **argv)
{
    int *buf, i, rank, nints, len;
    char *filename, *tmp;
    int errs=0, toterrs;
    MPI_File fh;
    MPI_Status status[NR_NBOPS];
    MPI_Request request[NR_NBOPS];
    int errcode = 0;

    MPI_Init(&argc,&argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!rank) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    fprintf(stderr, "\n*#  Usage: async -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+10);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+10);
	MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD);
    }


    buf = (int *) malloc(SIZE);
    nints = SIZE/sizeof(int);
    for (i=0; i<nints; i++) buf[i] = rank*100000 + i;

    /* each process opens a separate file called filename.'myrank' */
    tmp = (char *) malloc(len+10);
    strcpy(tmp, filename);
    sprintf(filename, "%s.%d", tmp, rank);

    errcode = MPI_File_open(MPI_COMM_SELF, filename, 
		    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "MPI_File_open");
    }
    MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    for (i=0; i<NR_NBOPS; i++) { 
	errcode = MPI_File_iwrite_at(fh, nints/NR_NBOPS*i, 
		buf+(nints/NR_NBOPS*i), nints/NR_NBOPS, MPI_INT, &(request[i]));
	if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "MPI_File_iwrite");
	}
    }
    MPI_Waitall(NR_NBOPS, request, status);

    MPI_File_close(&fh);

    /* reopen the file and read the data back */

    for (i=0; i<nints; i++) buf[i] = 0;
    errcode = MPI_File_open(MPI_COMM_SELF, filename, 
		    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "MPI_File_open");
    }

    MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL);
    for (i=0; i<NR_NBOPS; i++) {
	errcode = MPI_File_iread_at(fh, nints/NR_NBOPS*i, 
		buf+(nints/NR_NBOPS*i), nints/NR_NBOPS, MPI_INT, &(request[i]));
	if (errcode != MPI_SUCCESS) {
	    handle_error(errcode, "MPI_File_open");
	}
    }
    MPI_Waitall(NR_NBOPS, request, status);

    MPI_File_close(&fh);

    /* check if the data read is correct */
    for (i=0; i<nints; i++) {
	if (buf[i] != (rank*100000 + i)) {
	    errs++;
	    fprintf(stderr, "Process %d: error, read %d, should be %d\n", rank, buf[i], rank*100000+i);
	}
    }

    MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (rank == 0) {
	if( toterrs > 0) {
	    fprintf( stderr, "Found %d errors\n", toterrs );
	}
	else {
	    fprintf( stdout, " No Errors\n" );
	}
    }

    free(buf);
    free(filename);
    free(tmp);

    MPI_Finalize();
    return 0; 
}
예제 #9
0
int main(int argc, char **argv)
{
    int *buf, i, mynod, nprocs, len, b[3];
    int errs=0, toterrs;
    MPI_Aint d[3];
    MPI_File fh;
    MPI_Status status;
    char *filename;
    MPI_Datatype typevec, newtype, t[3];
    MPIO_Request req;

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

    if (nprocs != 2) {
        fprintf(stderr, "Run this program on two processes\n");
        MPI_Abort(MPI_COMM_WORLD, 1);
    }

/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
    if (!mynod) {
	i = 1;
	while ((i < argc) && strcmp("-fname", *argv)) {
	    i++;
	    argv++;
	}
	if (i >= argc) {
	    fprintf(stderr, "\n*#  Usage: i_noncontig -fname filename\n\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	argv++;
	len = strlen(*argv);
	filename = (char *) malloc(len+1);
	strcpy(filename, *argv);
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }
    else {
	MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD);
	filename = (char *) malloc(len+1);
	MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD);
    }

    buf = (int *) malloc(SIZE*sizeof(int));

    MPI_Type_vector(SIZE/2, 1, 2, MPI_INT, &typevec);

    b[0] = b[1] = b[2] = 1;
    d[0] = 0;
    d[1] = mynod*sizeof(int);
    d[2] = SIZE*sizeof(int);
    t[0] = MPI_LB;
    t[1] = typevec;
    t[2] = MPI_UB;

    MPI_Type_struct(3, b, d, t, &newtype);
    MPI_Type_commit(&newtype);
    MPI_Type_free(&typevec);

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using nonblocking I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | 
             MPI_MODE_RDWR, MPI_INFO_NULL, &fh);

    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);

    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_iwrite(fh, buf, 1, newtype, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    MPI_File_iread_at(fh, 0, buf, 1, newtype, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if ((i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	    if (!(i%2) && (buf[i] != i)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if ((i%2) && (buf[i] != i + mynod*SIZE)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	    if (!(i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using nonblocking I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | 
             MPI_MODE_RDWR, MPI_INFO_NULL, &fh);

    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_iwrite_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    MPI_File_iread_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if ((i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	    if (!(i%2) && (buf[i] != i)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n",
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if ((i%2) && (buf[i] != i + mynod*SIZE)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	    if (!(i%2) && (buf[i] != -1)) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", 
			mynod, i, buf[i]);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (!mynod) {
#if VERBOSE
	fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using nonblocking I/O\n");
#endif
	MPI_File_delete(filename, MPI_INFO_NULL);
    }
    MPI_Barrier(MPI_COMM_WORLD);

    MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | 
             MPI_MODE_RDWR, MPI_INFO_NULL, &fh);

    MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", MPI_INFO_NULL);

    for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE;
    MPI_File_iwrite(fh, buf, SIZE, MPI_INT, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    MPI_Barrier(MPI_COMM_WORLD);

    for (i=0; i<SIZE; i++) buf[i] = -1;

    MPI_File_iread_at(fh, 0, buf, SIZE, MPI_INT, &req);
#ifdef MPIO_USES_MPI_REQUEST
    MPI_Wait(&req, &status);
#else
    MPIO_Wait(&req, &status);
#endif

    for (i=0; i<SIZE; i++) {
	if (!mynod) {
	    if (buf[i] != i) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i);
	    }
	}
	else {
	    if (buf[i] != i + mynod*SIZE) {
		errs++;
		fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", 
			mynod, i, buf[i], i + mynod*SIZE);
	    }
	}
    }

    MPI_File_close(&fh);

    MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    if (mynod == 0) {
	if( toterrs > 0) {
	    fprintf( stderr, "Found %d errors\n", toterrs );
	}
	else {
	    fprintf( stdout, " No Errors\n" );
	}
    }
    MPI_Type_free(&newtype);
    free(buf);
    free(filename);
    MPI_Finalize();
    return 0;
}
예제 #10
0
파일: coflow.c 프로젝트: Prajaktcs/optiq
int main(int argc, char **argv) {
    int myrank, numprocs;

    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

    int coord[3];
    uint16_t nid;

    optiq_get_coord(coord);
    optiq_get_nic_id(&nid);

    printf("Rank: %d coord[%d, %d, %d], nid = %d\n", myrank, coord[0], coord[1], coord[2], nid);

    int myId = myrank;
    int centerId = numprocs/2;

    int num_neighbors = numprocs - 1;
    int *neighbors = (int *) malloc(sizeof(int) * num_neighbors);

    for (int i = 0; i < numprocs/2; i ++) {
	neighbors[i] = i;
    }
    for (int i = numprocs/2; i < numprocs-1; i ++) {
        neighbors[i] = i+1;
    }

    MPI_File fh;
    char fileName[] = "temp_test";
    MPI_File_open(MPI_COMM_WORLD, fileName, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh);

    if (myrank == 0)
	printf("File opened\n");

    int write_count = 256*1024*1024;
    void *write_buf = malloc(write_count);
    int send_count = 8*1024*1024;
    char *send_buf = (char*)malloc(send_count);
    MPI_Status w_status;
    MPI_Offset offset = 0;

    int iters = 100;
    MPI_Request **requests = (MPI_Request**)malloc(sizeof(MPI_Request*)*iters);
    MPI_Status **status = (MPI_Status**)malloc(sizeof(MPI_Status*)*iters);
    for (int i = 0; i < iters; i++) {
	requests[i] = (MPI_Request*)malloc(sizeof(MPI_Request)*num_neighbors);
	status[i] = (MPI_Status*)malloc(sizeof(MPI_Status)*num_neighbors);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    if (myrank == 0) {
	printf("Start testing\n");
    }

    if	(myId == centerId) {
	printf("Rank %d: I'm center. Neighbors: ", myId);
	for (int i = 0; i < num_neighbors; i++) {
	    printf("%d ", neighbors[i]);
	}
	printf("\n");
    }

    for (int i = 0; i < num_neighbors; i++) {
	if (myId == neighbors[i]) {    
	    printf("I'm Neighbor %d\n", neighbors[i]);
	}
    }

    /*Do warm up - 100 times comm and 5 times I/O*/
    

    struct timespec start, end;

    /*Test 1: Comm only*/
    MPI_Barrier(MPI_COMM_WORLD);
    if (myId == 0) {
	printf("\nTest 1: Comm only between center and its neighbors\n\n");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    if (myId == centerId)
    {
        /*Post MPI_Isend*/
        for (int i = 0; i < iters; i++) {
            for (int j = 0; j < num_neighbors; j++) {
                int dest = neighbors[j];
                MPI_Isend(send_buf, send_count, MPI_BYTE, dest, 0, MPI_COMM_WORLD, &requests[i][j]);
            }
        }

        /*Check if the request is done*/
        for (int i = 0; i < iters; i++) {
            MPI_Waitall(num_neighbors, requests[i], status[i]);
        }
    }

    clock_gettime(CLOCK_REALTIME, &start);

    for (int i = 0; i < iters; i++) {
        for(int j = 0; j < num_neighbors; j++) {
            if (myId == neighbors[j]) {
                MPI_Recv(send_buf, send_count, MPI_BYTE, centerId, 0, MPI_COMM_WORLD, &status[i][0]);
            }
        }
    }

    clock_gettime(CLOCK_REALTIME, &end);

    
    double elapsed = (end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/(double)BILLION;
    elapsed = elapsed/iters;

    double max_elapsed;
    MPI_Reduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, centerId, MPI_COMM_WORLD);
    double bw = 0;

    if (myId == centerId) {
	bw = (double)send_count/1024/1024/max_elapsed;
        printf("Comm Flow Only: Elapsed time at center side = %8.6f bw = %8.4f\n", max_elapsed, bw);
    }

    for (int i = 0; i < num_neighbors; i++) {
        if  (myId == neighbors[i]) {
            bw = (double)send_count/1024/1024/elapsed;
            printf("Neighbor %d Comm Flow Only: Elapsed time = %8.6f bw = %8.4f\n", myId, elapsed, bw);
        }
    }

    /*Test 2: I/O only*/
    MPI_Barrier(MPI_COMM_WORLD);
    if (myId == 0) {
	printf("\nTest 2: I/O only from center.\n");
    }
    MPI_Barrier(MPI_COMM_WORLD);
    
    MPI_File_open(MPI_COMM_WORLD, fileName, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh);
    MPIO_Request *write_requests = (MPIO_Request*)malloc(sizeof(MPIO_Request)*iters);
    MPI_Status *write_status = (MPI_Status*)malloc(sizeof(MPI_Status)*iters);

    MPI_Barrier(MPI_COMM_WORLD);

    clock_gettime(CLOCK_REALTIME, &start);

    if (myId == centerId)
    {
        /*Post MPI_Isend*/
        for (int i = 0; i < iters; i++) {
	    MPI_File_write_at(fh, offset, write_buf, write_count, MPI_BYTE, &write_status[i]);
	    offset += write_count;
        }
    }

    MPI_File_close(&fh);

    clock_gettime(CLOCK_REALTIME, &end);

    elapsed = (end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/(double)BILLION;
    elapsed = elapsed/iters;

    MPI_Reduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, centerId, MPI_COMM_WORLD);

    if (myId == centerId) {
	bw = (double)send_count/1024/1024/max_elapsed;
        printf("I/O Flow Only: Elapsed time = %8.6f bw = %8.4f\n", max_elapsed, bw);
    }

    /*Test 3: Post Isend, I/O, Waitall for Isend*/
    MPI_Barrier(MPI_COMM_WORLD);
    if (myId == 0) {
        printf("\nTest 3: Post Isend, I/O, Waitall for Isend\n");
    }
    MPI_Barrier(MPI_COMM_WORLD);

    if (myId == centerId) {
	/*Post MPI_Isend*/
	for (int i = 0; i < iters; i++) {
	    for (int j = 0; j < num_neighbors; j++) {
		int dest = neighbors[j];
		MPI_Isend(send_buf, send_count, MPI_BYTE, dest, 0, MPI_COMM_WORLD, &requests[i][j]);
	    }
	}

	/*Do I/O*/
	for (int i = 0; i < iters; i ++) {
	    MPI_File_write_at(fh, offset, write_buf, write_count, MPI_BYTE, &w_status);
	    offset += write_count;
	}

	/*Check if the request is done*/
	for (int i = 0; i < iters; i++) {
	    MPI_Waitall(num_neighbors, requests[i], status[i]);
	}
    }

    clock_gettime(CLOCK_REALTIME, &start);

    for (int i = 0; i < iters; i++) {
	for(int j = 0; j < num_neighbors; j++) {
	    if (myId == neighbors[j]) {
		MPI_Recv(send_buf, send_count, MPI_BYTE, centerId, 0, MPI_COMM_WORLD, &status[i][0]);
	    }
	}
    }

    clock_gettime(CLOCK_REALTIME, &end);
    
    MPI_File_close(&fh);

    elapsed = (end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/(double)BILLION;
    elapsed = elapsed/iters;

    MPI_Reduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, centerId, MPI_COMM_WORLD);

    if (myId == centerId) {
	bw = (double)send_count/1024/1024/max_elapsed;
	printf("CoFlow - Comm first: Elapsed time at center side = %8.6f bw = %8.4f\n", max_elapsed, bw);
    }

    for	(int i = 0; i < num_neighbors; i++) {
	if  (myId == neighbors[i]) {
	    bw = (double)send_count/1024/1024/elapsed;
	    printf("Neighbor %d CoFlow - Comm first: Elapsed time = %8.6f bw = %8.4f\n", myId, elapsed, bw);
	}
    }

    MPI_Barrier(MPI_COMM_WORLD);
    if (myId == 0) {
	printf("\nTest 4: iwrite, isend, wait all iwrite, wait all isend\n");
    }
    MPI_File_open(MPI_COMM_WORLD, fileName, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (myId == centerId)
    {
	/*Do I/O*/
        for (int i = 0; i < iters; i ++) {
            MPI_File_iwrite_at(fh, offset, write_buf, write_count, MPI_BYTE, &write_requests[i]);
            offset += write_count;
        }

        /*Post MPI_Isend*/
        for (int i = 0; i < iters; i++) {
            for (int j = 0; j < num_neighbors; j++) {
                int dest = neighbors[j];
                MPI_Isend(send_buf, send_count, MPI_BYTE, dest, 0, MPI_COMM_WORLD, &requests[i][j]);
            }
        }

	MPI_Waitall(iters, write_requests, write_status);

        /*Check if the request is done*/
        for (int i = 0; i < iters; i++) {
            MPI_Waitall(num_neighbors, requests[i], status[i]);
        }
    }

    clock_gettime(CLOCK_REALTIME, &start);

    for (int i = 0; i < iters; i++) {
        for (int j = 0; j < num_neighbors; j++) {
            if (myId == neighbors[j]) {
                MPI_Recv(send_buf, send_count, MPI_BYTE, centerId, 0, MPI_COMM_WORLD, &status[i][0]);
            }
        }
    }

    clock_gettime(CLOCK_REALTIME, &end);

    MPI_File_close(&fh);

    elapsed = (end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/(double)BILLION;
    elapsed = elapsed/iters;

    MPI_Reduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, centerId, MPI_COMM_WORLD);
    
    if (myId == centerId) {
	bw = (double)send_count/1024/1024/max_elapsed;
        printf("CoFlow - I/O first: Elapsed time at ceter side = %8.6f bw = %8.4f\n", max_elapsed, bw);
    }

    for (int i = 0; i < num_neighbors; i++) {
        if (myId == neighbors[i]) {
            bw = (double)send_count/1024/1024/elapsed;
            printf("Neighbor %d CoFlow - I/O first: Elapsed time = %8.6f bw = %8.4f\n", myId, elapsed, bw);
        }
    }

    MPI_Barrier(MPI_COMM_WORLD);
    if (myId == 0) {
        printf("\nTest 5: isend, iwrite, wait all isend, wait all iwrite\n");
    }
    MPI_File_open(MPI_COMM_WORLD, fileName, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh);

    MPI_Barrier(MPI_COMM_WORLD);

    if (myId == centerId)
    {
        /*Post MPI_Isend*/
        for (int i = 0; i < iters; i++) {
            for (int j = 0; j < num_neighbors; j++) {
                int dest = neighbors[j];
                MPI_Isend(send_buf, send_count, MPI_BYTE, dest, 0, MPI_COMM_WORLD, &requests[i][j]);
            }
        }
	
        /*Do I/O*/
        for (int i = 0; i < iters; i ++) {
            MPI_File_iwrite_at(fh, offset, write_buf, write_count, MPI_BYTE, &write_requests[i]);
            offset += write_count;
        }

	/*Check if the request is done*/
        for (int i = 0; i < iters; i++) {
            MPI_Waitall(num_neighbors, requests[i], status[i]);
        }

	MPI_Waitall(iters, write_requests, write_status);
    }

    clock_gettime(CLOCK_REALTIME, &start);

    for (int i = 0; i < iters; i++) {
        for (int j = 0; j < num_neighbors; j++) {
            if (myId == neighbors[j]) {
                MPI_Recv(send_buf, send_count, MPI_BYTE, centerId, 0, MPI_COMM_WORLD, &status[i][0]);
            }
        }
    }

    clock_gettime(CLOCK_REALTIME, &end);

    MPI_File_close(&fh);

    elapsed = (end.tv_sec - start.tv_sec) + (double)(end.tv_nsec - start.tv_nsec)/(double)BILLION;
    elapsed = elapsed/iters;

    MPI_Reduce(&elapsed, &max_elapsed, 1, MPI_DOUBLE, MPI_MAX, centerId, MPI_COMM_WORLD);
    bw = (double)send_count/1024/1024/max_elapsed;

    if (myId == centerId) {
        printf("CoFlow: Isend, iwrite, wait isend, wait i/0 time = %8.6f bw = %8.4f\n", max_elapsed, bw);
    }

    for (int i = 0; i < num_neighbors; i++) {
        if (myId == neighbors[i]) {
            bw = (double)send_count/1024/1024/elapsed;
            printf("Neighbor %d isend, iwrite, wait isend, wait i/o: Elapsed time = %8.6f bw = %8.4f\n", myId, elapsed, bw);
        }
    }
    MPI_Finalize();
}