FORTRAN_API void FORT_CALL mpi_file_seek_shared_(MPI_Fint *fh,MPI_Offset *offset,MPI_Fint *whence, MPI_Fint *ierr ) { MPI_File fh_c; fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_seek_shared(fh_c,*offset,*whence); }
FORT_DLL_SPEC void FORT_CALL mpi_file_seek_shared_ ( MPI_Fint *v1, MPI_Offset *v2, MPI_Fint *v3, MPI_Fint *ierr ){ #ifdef MPI_MODE_RDONLY *ierr = MPI_File_seek_shared( MPI_File_f2c(*v1), (MPI_Offset)*v2, (int)*v3 ); #else *ierr = MPI_ERR_INTERN; #endif }
int ssioSetPos(SSIO *ssio, const MPI_Offset pos) { assert(ssio != NULL); // don't seek past header assert(pos == SSHEAD_SIZE); MPI_File_seek_shared(ssio->mfile, pos, MPI_SEEK_SET); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int size, rank, i, *buf, rc; MPI_File fh; MPI_Comm comm; MPI_Status status; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_File_open( comm, (char*)"test.ord", MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh ); MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); buf = (int *)malloc( size * sizeof(int) ); buf[0] = rank; rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status ); if (rc) { MTestPrintErrorMsg( "File_write_ordered", rc ); errs++; } /* make sure all writes finish before we seek/read */ MPI_Barrier(comm); /* Set the individual pointer to 0, since we want to use a read_all */ MPI_File_seek( fh, 0, MPI_SEEK_SET ); MPI_File_read_all( fh, buf, size, MPI_INT, &status ); for (i=0; i<size; i++) { if (buf[i] != i) { errs++; fprintf( stderr, "%d: buf[%d] = %d\n", rank, i, buf[i] ); } } MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); for (i=0; i<size; i++) buf[i] = -1; MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status ); if (buf[0] != rank) { errs++; fprintf( stderr, "%d: buf[0] = %d\n", rank, buf[0] ); } free( buf ); MPI_File_close( &fh ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
void ssioRewind(SSIO *ssio) { assert(ssio != NULL); #ifdef SSIO_USE_MPI MPI_File_seek_shared(ssio->mfile, 0, MPI_SEEK_SET); #else rewind(ssio->fp); #endif /* SSIO_USE_MPI */ }
/** * fh is shared by all processes. * In case of reading, after one process finishes, and the fh moves to the new position, * the another process use this fh to reading. * * int MPI_File_seek_shared(MPI_File fh, MPI_Offset offset, int whence); * int MPI_File_get_position_shared(MPI_File fh, MPI_Offset * offset); * * int MPI_File_write_shared(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status * status); * int MPI_File_read_shared(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status * status); * * non-blocking read/write, need MPI_Wait to wait for finishing * int MPI_File_iread_shared(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Request * request) * int MPI_File_iwrite_shared(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Request * request) * * read/write in oder of rankID * int MPI_File_read_ordered(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status * status); * int MPI_File_write_ordered(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status * status); */ int main(int argc, char *argv[]) { int totalTaskNum, rankID; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rankID); MPI_Comm_size(MPI_COMM_WORLD, &totalTaskNum); char *filename = "file.txt"; MPI_File fh; MPI_Status status; MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); char write_buf[128]; sprintf(write_buf, "rankID = %d, totalTaskNum = %d\n", rankID, totalTaskNum); // MPI_File_write_shared(fh, write_buf, strlen(write_buf), MPI_CHAR, &status); // MPI_File_write_ordered(fh, write_buf, strlen(write_buf), MPI_CHAR, &status); MPI_Request request; MPI_File_iwrite_shared(fh, write_buf, strlen(write_buf), MPI_CHAR, &request); MPI_Wait(&request, &status); MPI_Offset offset; MPI_File_get_position_shared(fh, &offset); printf("1: rankID = %d, Now the position is offset = %lld\n", rankID, offset); sleep(3); MPI_Offset offset2 = 0; int whence = MPI_SEEK_SET; MPI_File_seek_shared(fh, offset2, whence); MPI_File_get_position_shared(fh, &offset); printf("2: rankID = %d, Now the position is offset = %lld\n", rankID, offset); sleep(3); char read_buf[30]; // MPI_File_read_ordered(fh, read_buf, sizeof(read_buf), MPI_CHAR, &status); // MPI_File_read_shared(fh, read_buf, sizeof(read_buf), MPI_CHAR, &status); MPI_File_iread_shared(fh, read_buf, sizeof(read_buf), MPI_CHAR, &request); MPI_Wait(&request, &status); printf("rankID = %d, content = '%s'\n", rankID, read_buf); MPI_File_close(&fh);//after open, fh has the communicator info MPI_Finalize(); return 0; }
JNIEXPORT void JNICALL Java_mpi_File_seekShared( JNIEnv *env, jobject jthis, jlong fh, jlong offset, jint whence) { int rc = MPI_File_seek_shared((MPI_File)fh, (MPI_Offset)offset, whence); ompi_java_exceptionCheck(env, rc); }
/* Major reconstruction of memory management for -off_cache flag */ void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, struct Bench* Bmark, MODES BMODE, int iter, int size) /* Initializes communications buffers (call set_buf) Initializes iterations scheduling Input variables: -Bmark (type struct Bench*) (For explanation of struct Bench type: describes all aspects of modes of a benchmark; see [1] for more information) Current benchmark -BMODE (type MODES) aggregate / non aggregate -iter (type int) number of current iteration of message size loop -size (type int) Message size In/out variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information Communications buffers are allocated and assigned values -ITERATIONS (type struct iter_schedule*) Adaptive number of iterations, out of cache scheduling are setup if requested */ /* >> IMB 3.1 */ { /* IMB 3.1 << */ size_t s_len, r_len, s_alloc, r_alloc; int init_size, irep, i_s, i_r, x_sample; const int root_based = has_root(Bmark->name); x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr; /* July 2002 fix V2.2.1: */ #if (defined EXT || defined MPIIO || RMA) if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr; #endif ITERATIONS->n_sample = (size > 0) ? max(1, min(ITERATIONS->overall_vol / size, x_sample)) : x_sample; Bmark->sample_failure = 0; init_size = max(size, asize); if (c_info->rank < 0) { return; } else { if (ITERATIONS->iter_policy == imode_off) { ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample; } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) { /* n_sample for benchmarks with uneven distribution of works must be greater or equal and multiple to num_procs. The formula below is a negative leg of hyperbola. It's moved and scaled relative to max message size and initial n_sample subject to multiple to num_procs. */ double d_n_sample = ITERATIONS->msgspersample; int max_msg_size = 1<<c_info->max_msg_log; int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5); ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs); } /* else as is */ } if ( #ifdef MPI1 !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv") #elif defined NBC // MPI1 !strcmp(Bmark->name, "Ialltoall") || !strcmp(Bmark->name, "Ialltoall_pure") || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure") #else 0 #endif // NBC // MPI1 ) { s_len = (size_t)c_info->num_procs * (size_t)init_size; r_len = (size_t)c_info->num_procs * (size_t)init_size; } else if ( #ifdef MPI1 !strcmp(Bmark->name, "Allgather") || !strcmp(Bmark->name, "Allgatherv") || !strcmp(Bmark->name, "Gather") || !strcmp(Bmark->name, "Gatherv") #elif defined NBC !strcmp(Bmark->name, "Iallgather") || !strcmp(Bmark->name, "Iallgather_pure") || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure") || !strcmp(Bmark->name, "Igather") || !strcmp(Bmark->name, "Igather_pure") || !strcmp(Bmark->name, "Igatherv") || !strcmp(Bmark->name, "Igatherv_pure") #else // MPI1 // NBC 0 #endif // MPI1 // NBC ) { s_len = (size_t) init_size; r_len = (size_t) c_info->num_procs * (size_t)init_size; } else if( !strcmp(Bmark->name,"Exchange") ) { s_len = 2 * (size_t)init_size; r_len = (size_t) init_size; } else if( #ifdef MPI1 !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv") #elif defined NBC // MPI1 !strcmp(Bmark->name,"Iscatter") || !strcmp(Bmark->name,"Iscatter_pure") || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure") #else // NBC // MPI1 0 #endif // NBC // MPI1 ) { s_len = (size_t)c_info->num_procs * (size_t)init_size; r_len = (size_t)init_size; } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) { s_len = r_len = 0; } else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") ) { s_len = 2 * (size_t)init_size; r_len = 2 * (size_t)init_size; } else if (! strcmp(Bmark->name,"Compare_and_swap") ) { /* Compare_and_swap operations require 3 buffers, so allocate space for compare * buffers in our r_buffer */ s_len = (size_t)init_size; r_len = 3 * (size_t)init_size; } else { s_len = r_len = (size_t) init_size; } /*===============================================*/ /* the displ is declared as int by MPI1 standard If c_info->num_procs*init_size exceed INT_MAX value there is no way to run this sample */ if ( #ifdef MPI1 !strcmp(Bmark->name,"Alltoallv") || !strcmp(Bmark->name,"Allgatherv") || !strcmp(Bmark->name,"Scatterv") || !strcmp(Bmark->name,"Gatherv") #elif defined NBC // MPI1 !strcmp(Bmark->name,"Ialltoallv") || !strcmp(Bmark->name,"Ialltoallv_pure") || !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure") || !strcmp(Bmark->name,"Igatherv") || !strcmp(Bmark->name,"Igatherv_pure") #else // NBC // MPI1 0 #endif // NBC // MPI1 ) { if( s_len > INT_MAX || r_len > INT_MAX) { Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW; return; } } /*===============================================*/ /* IMB 3.1: new memory management for -off_cache */ if (BMODE->type == Sync) { ITERATIONS->use_off_cache=0; ITERATIONS->n_sample=x_sample; } else { #ifdef MPIIO ITERATIONS->use_off_cache=0; #else ITERATIONS->use_off_cache = ITERATIONS->off_cache; #endif if (ITERATIONS->off_cache) { if ( ITERATIONS->cache_size > 0) { size_t cls = (size_t) ITERATIONS->cache_line_size; size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls; ITERATIONS->s_offs = ofs; ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); ofs = ( ( r_len + cls -1 )/cls + 1 )*cls; ITERATIONS->r_offs = ofs; ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); } else { ITERATIONS->s_offs=ITERATIONS->r_offs=0; ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1; } } } #ifdef MPIIO s_alloc = s_len; r_alloc = r_len; #else if( ITERATIONS->use_off_cache ) { s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs); r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs); } else { s_alloc = s_len; r_alloc = r_len; } #endif c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT; #ifdef DEBUG { size_t mx, mu; mx = (size_t) MEM_UNIT*c_info->max_mem; mu = (size_t) MEM_UNIT*c_info->used_mem; DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample); DBG_I2("max / used memory ",mx,mu); DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc); DBGF_I2("Got send / recv lengths ",s_len,r_len); DBGF_I2("max / used memory ",mx,mu); DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc); } #endif if( c_info->used_mem > c_info->max_mem ) { Bmark->sample_failure=SAMPLE_FAILED_MEMORY; return; } if (s_alloc > 0 && r_alloc > 0) { if (ITERATIONS->use_off_cache) { IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc); IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1); for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) { i_s = irep % ITERATIONS->s_cache_iter; memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len); } for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) { i_r = irep % ITERATIONS->r_cache_iter; memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len); } } else { IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1); } } IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc)); /* Determine #iterations if dynamic adaptation requested */ if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) { double time[MAX_TIME_ID]; int acc_rep_test, t_sample; int selected_n_sample = ITERATIONS->n_sample; memset(time, 0, MAX_TIME_ID); if (iter == 0 || BMODE->type == Sync) { ITERATIONS->n_sample_prev = ITERATIONS->msgspersample; if (c_info->n_lens > 0) { memset(ITERATIONS->numiters, 0, c_info->n_lens); } } /* first, run 1 iteration only */ ITERATIONS->n_sample=1; #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); { /* determine rough #repetitions for a run time of 1 sec */ int rep_test = 1; if (time[0] < (1.0 / MSGSPERSAMPLE)) { rep_test = MSGSPERSAMPLE; } else if ((time[0] < 1.0)) { rep_test = (int)(1.0 / time[0] + 0.5); } MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator); } ITERATIONS->n_sample = min(selected_n_sample, acc_rep_test); if (ITERATIONS->n_sample > 1) { #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if ( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); } { float val = (float) (1+ITERATIONS->secs/time[0]); t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff)) ? (int)val : selected_n_sample; } if (c_info->n_lens>0 && BMODE->type != Sync) { // check monotonicity with msg sizes int i; for (i = 0; i < iter; i++) { t_sample = ( c_info->msglen[i] < size ) ? min(t_sample,ITERATIONS->numiters[i]) : max(t_sample,ITERATIONS->numiters[i]); } ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample); } else { ITERATIONS->n_sample = min(selected_n_sample, min(ITERATIONS->n_sample_prev, t_sample)); } MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator); #ifdef DEBUG { int usec=time*1000000; DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec); DBGF_I1("=> # samples, aligned with previous ",t_sample); DBGF_I1("final #samples ",ITERATIONS->n_sample); } #endif } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/ double time[MAX_TIME_ID]; Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); } ITERATIONS->n_sample_prev=ITERATIONS->n_sample; /* >> IMB 3.1 */ }
int main(int argc, char **argv) { int *buf, i, rank, nprocs, len, sum; int global_sum; int errs=0, toterrs, errcode; char *filename; MPI_File fh; MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); double wr_stime, wr_etime, wr_time, wr_sumtime; double rd_stime, rd_etime, rd_time, rd_sumtime; /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: shared_fp -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(COUNT * sizeof(int)); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); for (i=0; i<COUNT; i++) buf[i] = COUNT*rank + i; errcode = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_open"); } wr_stime = MPI_Wtime(); errcode = MPI_File_write_ordered(fh, buf, COUNT, MPI_INT, &status); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_write_shared"); } wr_etime = MPI_Wtime(); for (i=0; i<COUNT; i++) buf[i] = 0; MPI_Barrier(MPI_COMM_WORLD); rd_stime = MPI_Wtime(); errcode = MPI_File_seek_shared(fh, 0, MPI_SEEK_SET); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_seek_shared"); } errcode = MPI_File_read_ordered(fh, buf, COUNT, MPI_INT, &status); if (errcode != MPI_SUCCESS) { handle_error(errcode, "MPI_File_read_shared"); } rd_etime = MPI_Wtime(); MPI_File_close(&fh); sum = 0; for (i=0; i<COUNT; i++) sum += buf[i]; MPI_Allreduce(&sum, &global_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); wr_time = wr_etime - wr_stime; rd_time = rd_etime - rd_stime; MPI_Allreduce(&wr_time, &wr_sumtime, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&rd_time, &rd_sumtime, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (global_sum != (((COUNT*nprocs - 1)*(COUNT*nprocs))/2)) { errs++; fprintf(stderr, "Error: sum %d, global_sum %d, %d\n", sum, global_sum,(((COUNT*nprocs - 1)*(COUNT*nprocs))/2)); } free(buf); free(filename); MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { if( toterrs > 0) { fprintf( stderr, "Found %d errors\n", toterrs ); } else { fprintf( stdout, " No Errors\n" ); #ifdef TIMING fprintf( stderr, "nprocs: %d bytes: %d write: %f read %f\n", nprocs, COUNT*sizeof(int), wr_sumtime, rd_sumtime); #endif } } MPI_Finalize(); return 0; }
int main( int argc, char *argv[] ) { int errs = 0; int size, rank, i, *buf, count, rc; MPI_File fh; MPI_Comm comm; MPI_Status status; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; rc = MPI_File_open( comm, (char*)"test.ord", MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh ); if (rc) { MTestPrintErrorMsg( "File_open", rc ); errs++; /* If the open fails, there isn't anything else that we can do */ goto fn_fail; } MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); buf = (int *)malloc( size * sizeof(int) ); buf[0] = rank; /* Write to file */ rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status ); if (rc) { MTestPrintErrorMsg( "File_write_ordered", rc ); errs++; } else { MPI_Get_count( &status, MPI_INT, &count ); if (count != 1) { errs++; fprintf( stderr, "Wrong count (%d) on write-ordered\n", count ); } } /* Set the individual pointer to 0, since we want to use a read_all */ MPI_File_seek( fh, 0, MPI_SEEK_SET ); /* Read nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_read( fh, buf, 0, MPI_INT, &status ); MPI_Get_count( &status, MPI_INT, &count ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on read\n", count ); } /* Write nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_write( fh, buf, 0, MPI_INT, &status ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on write\n", count ); } /* Read shared nothing (check status) */ MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); /* Read nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_read_shared( fh, buf, 0, MPI_INT, &status ); MPI_Get_count( &status, MPI_INT, &count ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on read shared\n", count ); } /* Write nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_write_shared( fh, buf, 0, MPI_INT, &status ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on write\n", count ); } MPI_Barrier( comm ); MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); for (i=0; i<size; i++) buf[i] = -1; MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status ); if (buf[0] != rank) { errs++; fprintf( stderr, "%d: buf = %d\n", rank, buf[0] ); } free( buf ); MPI_File_close( &fh ); fn_fail: MTest_Finalize( errs ); MPI_Finalize(); return 0; }
/* * access style is explicitly described as modifiable. values include * read_once, read_mostly, write_once, write_mostlye, random * * */ int main(int argc, char *argv[]) { int errs = 0, err; int buf[10]; int rank; MPI_Comm comm; MPI_Status status; MPI_File fh; MPI_Info infoin, infoout; char value[1024]; int flag, count; MTest_Init(&argc, &argv); comm = MPI_COMM_WORLD; MPI_Comm_rank(comm, &rank); MPI_Info_create(&infoin); MPI_Info_set(infoin, (char *) "access_style", (char *) "write_once,random"); MPI_File_open(comm, (char *) "testfile", MPI_MODE_RDWR | MPI_MODE_CREATE, infoin, &fh); buf[0] = rank; err = MPI_File_write_ordered(fh, buf, 1, MPI_INT, &status); if (err) { errs++; MTestPrintError(err); } MPI_Info_set(infoin, (char *) "access_style", (char *) "read_once"); err = MPI_File_seek_shared(fh, 0, MPI_SEEK_SET); if (err) { errs++; MTestPrintError(err); } err = MPI_File_set_info(fh, infoin); if (err) { errs++; MTestPrintError(err); } MPI_Info_free(&infoin); buf[0] = -1; err = MPI_File_read_ordered(fh, buf, 1, MPI_INT, &status); if (err) { errs++; MTestPrintError(err); } MPI_Get_count(&status, MPI_INT, &count); if (count != 1) { errs++; printf("Expected to read one int, read %d\n", count); } if (buf[0] != rank) { errs++; printf("Did not read expected value (%d)\n", buf[0]); } err = MPI_File_get_info(fh, &infoout); if (err) { errs++; MTestPrintError(err); } MPI_Info_get(infoout, (char *) "access_style", 1024, value, &flag); /* Note that an implementation is allowed to ignore the set_info, * so we'll accept either the original or the updated version */ if (!flag) { ; /* * errs++; * printf("Access style hint not saved\n"); */ } else { if (strcmp(value, "read_once") != 0 && strcmp(value, "write_once,random") != 0) { errs++; printf("value for access_style unexpected; is %s\n", value); } } MPI_Info_free(&infoout); err = MPI_File_close(&fh); if (err) { errs++; MTestPrintError(err); } MPI_Barrier(comm); MPI_Comm_rank(comm, &rank); if (rank == 0) { err = MPI_File_delete((char *) "testfile", MPI_INFO_NULL); if (err) { errs++; MTestPrintError(err); } } MTest_Finalize(errs); return MTestReturnValue(errs); }
int main(int argc, char **argv) { int *buf, i, rank, nprocs, len, sum, global_sum; char *filename; MPI_File fh; MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { printf("\n*# Usage: shared_fp <mpiparameter> -- -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(COUNT * sizeof(int)); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); for (i=0; i<COUNT; i++) buf[i] = COUNT*rank + i; MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_write_shared(fh, buf, COUNT, MPI_INT, &status); for (i=0; i<COUNT; i++) buf[i] = 0; MPI_Barrier(MPI_COMM_WORLD); MPI_File_seek_shared(fh, 0, MPI_SEEK_SET); MPI_File_read_shared(fh, buf, COUNT, MPI_INT, &status); MPI_File_close(&fh); sum = 0; for (i=0; i<COUNT; i++) sum += buf[i]; MPI_Allreduce(&sum, &global_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (global_sum != (((COUNT*nprocs - 1)*(COUNT*nprocs))/2)) printf("Error: sum %d, global_sum %d, %d\n", sum, global_sum,(((COUNT*nprocs - 1)*(COUNT*nprocs))/2)); free(buf); free(filename); if (!rank) printf("Done\n"); MPI_Finalize(); return 0; }
/* Major reconstruction of memory management for -off_cache flag */ void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, struct Bench* Bmark, MODES BMODE, int iter, int size) /* Initializes communications buffers (call set_buf) Initializes iterations scheduling Input variables: -Bmark (type struct Bench*) (For explanation of struct Bench type: describes all aspects of modes of a benchmark; see [1] for more information) Current benchmark -BMODE (type MODES) aggregate / non aggregate -iter (type int) number of current iteration of message size loop -size (type int) Message size In/out variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information Communications buffers are allocated and assigned values -ITERATIONS (type struct iter_schedule*) Adaptive number of iterations, out of cache scheduling are setup if requested */ /* >> IMB 3.1 */ { /* IMB 3.1 << */ size_t s_len, r_len, s_alloc, r_alloc; int init_size, irep, i_s, i_r, x_sample; x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr; /* July 2002 fix V2.2.1: */ #if (defined EXT || defined MPIIO) if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr; #endif if ( size>0 ) ITERATIONS->n_sample =max(1,min(ITERATIONS->overall_vol/size,x_sample)); else ITERATIONS->n_sample = x_sample ; Bmark->sample_failure=0; init_size = max(size,asize); if(c_info->rank < 0) return; if(!strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv")) { s_len = (size_t) c_info->num_procs*init_size; r_len = (size_t) c_info->num_procs*init_size; } else if( !strcmp(Bmark->name,"Allgather") || !strcmp(Bmark->name,"Allgatherv") ||!strcmp(Bmark->name,"Gather") || !strcmp(Bmark->name,"Gatherv") ) { s_len = (size_t) init_size; r_len = (size_t) c_info->num_procs*init_size; } else if( !strcmp(Bmark->name,"Exchange") ) { s_len = (size_t) 2*init_size; r_len = (size_t) init_size; } else if( !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv") ) { s_len = (size_t) c_info->num_procs*init_size; r_len = (size_t) init_size; } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) { s_len = r_len = 0; } else s_len = r_len = (size_t) init_size; /*===============================================*/ /* the displ is declared as int by MPI1 standard If c_info->num_procs*init_size exceed INT_MAX value there is no way to run this sample */ if( !strcmp(Bmark->name,"Alltoallv") || !strcmp(Bmark->name,"Allgatherv") || !strcmp(Bmark->name,"Scatterv") || !strcmp(Bmark->name,"Gatherv")) { if( s_len > INT_MAX || r_len > INT_MAX) { Bmark->sample_failure=SAMPLE_FAILED_INT_OVERFLOW; return; } } /*===============================================*/ /* IMB 3.1: new memory management for -off_cache */ if( BMODE->type == Sync ) { ITERATIONS->use_off_cache=0; ITERATIONS->n_sample=x_sample; } else { #ifdef MPIIO ITERATIONS->use_off_cache=0; #else ITERATIONS->use_off_cache=ITERATIONS->off_cache; #endif /*ITERATIONS->use_off_cache=ITERATIONS->off_cache;*/ if( ITERATIONS->off_cache ) { if( ITERATIONS->cache_size>0 ) { size_t cls = (size_t) ITERATIONS->cache_line_size; size_t ofs; ofs = ( ( s_len + cls -1 )/cls + 1 )*cls; ITERATIONS->s_offs = ofs; ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); ofs = ( ( r_len + cls -1 )/cls + 1 )*cls; ITERATIONS->r_offs = ofs; ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); } else { ITERATIONS->s_offs=ITERATIONS->r_offs=0; ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1; } } } #ifdef MPIIO s_alloc = s_len; r_alloc = r_len; #else if( ITERATIONS->use_off_cache ) { s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs); r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs); } else { s_alloc = s_len; r_alloc = r_len; } #endif c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT; #ifdef DEBUG { size_t mx, mu; mx = (size_t) MEM_UNIT*c_info->max_mem; mu = (size_t) MEM_UNIT*c_info->used_mem; DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample); DBG_I2("max / used memory ",mx,mu); DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc); DBGF_I2("Got send / recv lengths ",s_len,r_len); DBGF_I2("max / used memory ",mx,mu); DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc); } #endif if( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT ) { Bmark->sample_failure=SAMPLE_FAILED_MEMORY; } else { if( ITERATIONS->use_off_cache ) { if( s_alloc > 0 && r_alloc > 0) { IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc); IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1); for( irep=1; irep<ITERATIONS->s_cache_iter; irep++) { i_s=irep%ITERATIONS->s_cache_iter; memcpy((void*)((char*)c_info->s_buffer+i_s*ITERATIONS->s_offs),c_info->s_buffer, s_len); } for( irep=1; irep<ITERATIONS->r_cache_iter; irep++) { i_r=irep%ITERATIONS->r_cache_iter; memcpy((void*)((char*)c_info->r_buffer+i_r*ITERATIONS->r_offs),c_info->r_buffer, r_len); } } } else { if( s_alloc > 0 && r_alloc > 0) { IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1); } } IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc)); /* Determine #iterations if dynamic adaptation requested */ if( ITERATIONS->iter_dyn ) { double time[2]; int selected_n_sample; int rep_test, acc_rep_test, t_sample; selected_n_sample=ITERATIONS->n_sample; if( iter==0 || BMODE->type == Sync) { ITERATIONS->n_sample_prev=ITERATIONS->msgspersample; if( c_info->n_lens> 0) { int i; for(i=0; i<c_info->n_lens; i++) ITERATIONS->numiters[i]=0; } } rep_test=1; ITERATIONS->n_sample=rep_test; time[0]=time[1]=0; /* first, run 1 iteration only */ #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); /* determine rough #repetitions for a run time of 1 sec */ if( time[0] < 0.001 ) { rep_test=1000; } else if( time[0]<1. ) { rep_test = (int) (1./time[0]+.5); } MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator); ITERATIONS->n_sample=min(selected_n_sample,acc_rep_test); if( ITERATIONS->n_sample>1 ) { #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); } if( time[0] > 1.e-8 ) { float val = (float) (1+ITERATIONS->secs/time[0]); t_sample = (val<= (float) 0x7fffffff) ? (int) val : selected_n_sample; } else { t_sample = selected_n_sample; } if( c_info->n_lens>0 && BMODE->type != Sync) { // check monotonicity with msg sizes int it; for(it=0; it<iter; it++) { if( c_info->msglen[it] < size ) t_sample = min(t_sample,ITERATIONS->numiters[it]); else t_sample = max(t_sample,ITERATIONS->numiters[it]); } ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min( selected_n_sample,t_sample ); } else { ITERATIONS->n_sample = min( selected_n_sample, min( ITERATIONS->n_sample_prev, t_sample ) ); } MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator); #ifdef DEBUG { int usec=time*1000000; DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec); DBGF_I1("=> # samples, aligned with previous ",t_sample); DBGF_I1("final #samples ",ITERATIONS->n_sample); } #endif } /*if( ITERATIONS->iter_dyn )*/ ITERATIONS->n_sample_prev=ITERATIONS->n_sample; } /*if (!( s_alloc + r_alloc > c_info->max_mem*MEM_UNIT ))*/ /* >> IMB 3.1 */ }