int tcio_file_fread(tcio_distributed_fh dist_handle) { MPI_Status status; MPI_Offset offset_tmp; int count = 0; // tcio_distributed_fh dist_handle = handle->dist_buffer; log_debug("rank %d: tcio_file_fread\n",dist_handle->rank); MPI_Offset offset = dist_handle->rank * dist_handle->bfsize; // MPI_File_seek(dist_handle->fh, offset, MPI_SEEK_CUR); MPI_File_read_at(dist_handle->fh,offset, dist_handle->data, dist_handle->bfsize, MPI_BYTE, &status); MPI_Get_count(&status, MPI_BYTE, &count); int i = 1; while (count == dist_handle->bfsize) { // MPI_File_seek(dist_handle->fh, dist_handle->bfsize // * dist_handle->num_procs, MPI_SEEK_CUR); offset+=dist_handle->bfsize * dist_handle->num_procs; MPI_File_read_at(dist_handle->fh, offset, dist_handle->data + i * dist_handle->bfsize, dist_handle->bfsize, MPI_BYTE, &status); MPI_Get_count(&status, MPI_BYTE, &count); i++; } // offset_tmp = (i - 1) * dist_handle->num_procs * dist_handle->bfsize + count; offset_tmp = offset + count; //broadcast to find the max offset MPI_Allreduce(&offset_tmp, &dist_handle->max_offset, 1, MPI_LONG_LONG_INT, MPI_MAX, MPI_COMM_WORLD); log_debug("rank %d: tcio_file_fread end\n",dist_handle->rank); return 0; }
void read_file_bufferizer::read_buffer() { unsigned long to_read; buf_pos = 0; to_read = buf_size; if (to_read > end_file_pos - cur_file_pos_read) to_read = end_file_pos - cur_file_pos_read; #ifdef FILE_VIA_MPI MPI_File_read_at(fh, cur_file_pos_read, buffer, to_read, MPI_BYTE, NULL); #else #ifdef WIN32_FILE LARGE_INTEGER move; move.QuadPart = cur_file_pos_read; if (!SetFilePointerEx(fh, move, NULL, FILE_BEGIN)) { printf("read_file_bufferizer::read_buffer: cannot set file pointer to the end! file %s, LastError = %d\n", read_file_name, GetLastError()); ABORT(1); } unsigned long max_read = to_read; if (!ReadFile(fh, buffer, max_read, &to_read, NULL)) { printf("read_file_bufferizer::read_buffer: cannot read file to the end! file %s, LastError = %d\n", read_file_name, GetLastError()); ABORT(1); } #else os_lseek64(fh, cur_file_pos_read, SEEK_SET); to_read = ::read(fh, buffer, to_read); #endif #endif cur_file_pos_read += to_read; bytes_in_buffer = to_read; }
int MPI_File_iread_at(MPI_File mpi_fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPIO_Request *request) { int error_code; MPI_Status *status; MPID_CS_ENTER(); MPIR_Nest_incr(); status = (MPI_Status *) ADIOI_Malloc(sizeof(MPI_Status)); /* for now, no threads or anything fancy. * just call the blocking version */ error_code = MPI_File_read_at(mpi_fh, offset, buf, count, datatype, status); /* ROMIO-1 doesn't do anything with status.MPI_ERROR */ status->MPI_ERROR = error_code; /* kick off the request */ MPI_Grequest_start(MPIU_Greq_query_fn, MPIU_Greq_free_fn, MPIU_Greq_cancel_fn, status, request); /* but we did all the work already */ MPI_Grequest_complete(*request); MPIR_Nest_decr(); MPID_CS_EXIT(); /* passed the buck to the blocking version...*/ return MPI_SUCCESS; }
/*! Reads the geometry from given open file starting at given offset. Returns true on success, false otherwise. */ bool read(MPI_File file, MPI_Offset offset) const { int read_geometry_id = No_Geometry::geometry_id + 1; const int ret_val = MPI_File_read_at( file, offset, (void*) &read_geometry_id, 1, MPI_INT, MPI_STATUS_IGNORE ); if (ret_val != MPI_SUCCESS) { std::cerr << __FILE__ << ":" << __LINE__ << " Couldn't read geometry data from given file: " << Error_String()(ret_val) << std::endl; return false; } if (read_geometry_id != No_Geometry::geometry_id) { std::cerr << __FILE__ << ":" << __LINE__ << " Wrong geometry: " << read_geometry_id << ", should be " << No_Geometry::geometry_id << std::endl; return false; } return true; }
inline bool SpParHelper::FetchBatch(MPI_File & infile, MPI_Offset & curpos, MPI_Offset end_fpos, bool firstcall, vector<string> & lines, int myrank) { size_t bytes2fetch = ONEMILLION; // we might read more than needed but no problem as we won't process them char * buf = new char[bytes2fetch]; char * originalbuf = buf; // so that we can delete it later because "buf" will move MPI_Status status; int bytes_read; if(firstcall) { curpos -= 1; // first byte is to check whether we started at the beginning of a line bytes2fetch += 1; } MPI_File_read_at(infile, curpos, buf, bytes2fetch, MPI_CHAR, &status); MPI_Get_count(&status, MPI_CHAR, &bytes_read); // MPI_Get_Count can only return 32-bit integers if(!bytes_read) { delete [] originalbuf; return true; // done } SpParHelper::check_newline(&bytes_read, bytes2fetch, buf); if(firstcall) { if(buf[0] == '\n') // we got super lucky and hit the line break { buf += 1; bytes_read -= 1; curpos += 1; } else // skip to the next line and let the preceeding processor take care of this partial line { char *c = (char*)memchr(buf, '\n', MAXLINELENGTH); // return a pointer to the matching byte or NULL if the character does not occur if (c == NULL) { cout << "Unexpected line without a break" << endl; } int n = c - buf + 1; bytes_read -= n; buf += n; curpos += n; } } while(bytes_read > 0 && curpos < end_fpos) // this will also finish the last line { char *c = (char*)memchr(buf, '\n', bytes_read); // return a pointer to the matching byte or NULL if the character does not occur if (c == NULL) { delete [] originalbuf; return false; // if bytes_read stops in the middle of a line, that line will be re-read next time since curpos has not been moved forward yet } int n = c - buf + 1; // string constructor from char * buffer: copies the first n characters from the array of characters pointed by s lines.push_back(string(buf, n-1)); // no need to copy the newline character bytes_read -= n; // reduce remaining bytes buf += n; // move forward the buffer curpos += n; } delete [] originalbuf; if (curpos >= end_fpos) return true; // don't call it again, nothing left to read else return false; }
FORT_DLL_SPEC void FORT_CALL mpi_file_read_at_ ( MPI_Fint *v1, MPI_Offset *v2, void*v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){ #ifdef MPI_MODE_RDONLY *ierr = MPI_File_read_at( MPI_File_f2c(*v1), *v2, v3, *v4, (MPI_Datatype)(*v5), (MPI_Status *)(v6) ); #else *ierr = MPI_ERR_INTERN; #endif }
FORTRAN_API void FORT_CALL mpi_file_read_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf, MPI_Fint *count,MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr ) { MPI_File fh_c; fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_read_at(fh_c,*offset,buf,*count,(MPI_Datatype)*datatype,status); }
// nrItems need to be exactly specified vector<vector<float> > Storage::LoadDataFloatMPIBin(char* filename, int nrItems, int startColumn, int endColumn, MPI_Comm comm)//vector<int> indexes, MPI_Comm comm) { double timeStart; if(m_mpiRank == 0) { cout<<"Loading "<<filename<<"...";cout.flush(); timeStart = MPI_Wtime(); } vector<vector<float> > data(nrItems); MPI_File fh; MPI_File_open(comm,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh); vector<float> tempData(nrItems*(endColumn-startColumn)); if(endColumn-startColumn == 0) { cout<<"(E) endColumn-startColumn == 0\n";cout.flush(); } MPI_Status status; MPI_File_read_at(fh,startColumn*nrItems*sizeof(MPI_REAL4),&tempData[0],(endColumn-startColumn)*nrItems,MPI_REAL4,&status);//MPI_FLOAT,&status); //MPI_File_read_at(fh,startColumn*nrItems*sizeof(MPI_FLOAT),&tempData[0],(endColumn-startColumn)*nrItems,MPI_FLOAT,&status); //for(int i=0;i<(endColumn-startColumn);i++) for(int i=0;i<nrItems;i++) { vector<float> f(endColumn-startColumn); data[i] = f; } int index = 0; for(int j=0;j<(endColumn-startColumn);j++) { for(int i=0;i<nrItems;i++) { data[i][j] = tempData[index]; index++; } } MPI_File_close(&fh); if(m_mpiRank == 0) { if(data.size()>0) cout<<"Loaded "<<data.size()<<" items of size "<<data[0].size()<<". (Time (process 0): "<<MPI_Wtime()-timeStart<<")\n"; else cout<<"Warning: Loaded no items from filename: "<<filename<<"\n"; cout.flush(); } return data; }
int main( int argc, char *argv[] ) { int rank, errs = 0, rc; MPI_Errhandler ioerr_handler; MPI_Status status; MPI_File fh; char inbuf[80]; MTest_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); /* Create a file to which to attach the handler */ rc = MPI_File_open( MPI_COMM_WORLD, (char*)"test.txt", MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh ); if (rc) { errs ++; printf( "Unable to open test.txt for writing\n" ); } rc = MPI_File_create_errhandler( user_handler, &ioerr_handler ); if (rc) { errs++; printf("MPI_File_create_Errhandler returned an error code: %d\n", rc); } rc = MPI_File_set_errhandler( fh, ioerr_handler ); if (rc) { errs++; printf("MPI_File_set_errhandler returned an error code: %d\n", rc); } /* avoid leaking the errhandler, safe because they have refcount semantics */ rc = MPI_Errhandler_free(&ioerr_handler); if (rc) { errs++; printf("MPI_Errhandler_free returned an error code: %d\n", rc); } /* This should generate an error because the file mode is WRONLY */ rc = MPI_File_read_at( fh, 0, inbuf, 80, MPI_BYTE, &status ); if (handlerCalled != 1) { errs++; printf( "User-defined error handler was not called\n" ); } rc = MPI_File_close( &fh ); if (rc) { errs++; printf("MPI_File_close returned an error code: %d\n",rc); } MTest_Finalize( errs ); MPI_Finalize( ); return 0; }
static int verify_type(char *filename, MPI_Datatype type, int64_t expected_extent, int do_coll) { int rank, canary; MPI_Count tsize; int compare=-1; int errs=0, toterrs=0; MPI_Status status; MPI_File fh; MPI_Comm_rank(MPI_COMM_WORLD, &rank); CHECK( MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh)); CHECK( MPI_File_set_view(fh, rank*sizeof(int), MPI_BYTE, type, "native", MPI_INFO_NULL)); MPI_Type_size_x(type, &tsize); canary=rank+1000000; /* skip over first instance of type */ if (do_coll) { CHECK( MPI_File_write_at_all(fh, tsize, &canary, 1, MPI_INT, &status)); } else { CHECK( MPI_File_write_at(fh, tsize, &canary, 1, MPI_INT, &status)); } CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL)); if (do_coll) { CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } else { CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } if (compare != canary) errs=1; MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_File_close(&fh); if (toterrs) { printf("%d: got %d expected %d\n", rank, compare, canary); /* keep file if there's an error */ } else { if (rank == 0) MPI_File_delete(filename, MPI_INFO_NULL); } return (toterrs); }
void mpi_file_read_at_(MPI_Fint * fh, MPI_Offset * offset, void *buf, MPI_Fint * count, MPI_Fint * datatype, MPI_Status * status, MPI_Fint * ierr) { MPI_File fh_c; MPI_Datatype datatype_c; fh_c = MPI_File_f2c(*fh); datatype_c = MPI_Type_f2c(*datatype); *ierr = MPI_File_read_at(fh_c, *offset, buf, *count, datatype_c, status); }
char get_xy_cell(long long x,long long y,MPI_File file,long long mapxsize,long long mapysize) { // get the value of a cell from a file char temp; MPI_Status status; MPI_Offset offset=y*mapxsize+x; // this is the location in the file to read from MPI_File_read_at(file,offset,&temp,1,MPI_CHAR,&status); return temp; }
int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){ try { MPI_Status status; int pid; MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are for(int i=0;i<num;i++){ if (m->control_pressed) { return 1; } //read next sequence int length = MPIPos[start+i+1] - MPIPos[start+i]; char* buf4 = new char[length]; MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status); string tempBuf = buf4; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } istringstream iss (tempBuf,istringstream::in); delete buf4; Sequence* candidateSeq = new Sequence(iss); m->gobble(iss); if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine(); }else{ //find chimeras chimera->getChimeras(candidateSeq); if (m->control_pressed) { delete candidateSeq; return 1; } //print results chimera->print(outMPI, outAccMPI); } } delete candidateSeq; //report progress if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; } } //report progress if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; } return 0; } catch(exception& e) { m->errorOut(e, "ChimeraPintailCommand", "driverMPI"); exit(1); } }
/* * mpi_io_shared * * creates a single-shared-file * writes with independent-io * reads with independent-io * writes with collective-io * reads with collective-io */ int mpi_io_shared (char *path, int size, int rank) { MPI_File fh; char filepath[512]; MPI_Offset offset; MPI_Status status; void *buf; int bufcount = BYTES_PER_RANK; int rc; buf = malloc(bufcount); if (!buf) { return 0; } memset(buf, 0xa, bufcount); sprintf(filepath, "%s/%s", path, "cp-bench-mpio-shared"); rc = MPI_File_open(MPI_COMM_WORLD, filepath, (MPI_MODE_CREATE|MPI_MODE_RDWR|MPI_MODE_DELETE_ON_CLOSE), MPI_INFO_NULL, &fh); MPI_CHECK(rc,"MPI_File_open"); /* Indep Write */ offset = rank * bufcount; rc = MPI_File_write_at(fh,offset,buf,bufcount,MPI_BYTE,&status); MPI_CHECK(rc,"MPI_File_write_at"); MPI_Barrier(MPI_COMM_WORLD); /* Indep Read */ offset = ((rank+1)%size) * bufcount; rc = MPI_File_read_at(fh,offset,buf,bufcount,MPI_BYTE,&status); MPI_CHECK(rc,"MPI_File_read_at"); /* Collective Write */ offset = rank * bufcount; rc = MPI_File_write_at_all(fh, offset, buf, bufcount, MPI_BYTE, &status); MPI_CHECK(rc,"MPI_File_write_at_all"); /* Collective Read */ offset = ((rank+1)%size) * bufcount; rc = MPI_File_read_at_all(fh, offset, buf, bufcount, MPI_BYTE, &status); MPI_CHECK(rc,"MPI_File_read_at_all"); rc = MPI_File_close(&fh); MPI_CHECK(rc,"MPI_File_close"); free(buf); return 1; }
FORT_DLL_SPEC void FORT_CALL mpi_file_read_at_ ( MPI_Fint *v1, MPI_Offset *v2, void*v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){ #ifdef MPI_MODE_RDONLY #ifndef HAVE_MPI_F_INIT_WORKS_WITH_C if (MPIR_F_NeedInit){ mpirinitf_(); MPIR_F_NeedInit = 0; } #endif if (v6 == MPI_F_STATUS_IGNORE) { v6 = (MPI_Fint*)MPI_STATUS_IGNORE; } *ierr = MPI_File_read_at( MPI_File_f2c(*v1), (MPI_Offset)*v2, v3, (int)*v4, (MPI_Datatype)(*v5), (MPI_Status *)v6 ); #else *ierr = MPI_ERR_INTERN; #endif }
//load m*n matrix int loadmatrix_rows(MPI_File *fh, float *rbuf, int numrows, int rank, int numtasks, int m, int n) { MPI_Offset offset = 0; int i = 0, j = 0; MPI_Status status; MPI_Datatype rowtype; int result = 0; MPI_Type_contiguous(n, MPI_FLOAT, &rowtype); MPI_Type_commit(&rowtype); if(rank < m%numtasks) { offset = numrows*rank; } else if(rank == m%numtasks) { offset = (numrows+1)*rank; } else { offset = numrows*rank+m%numtasks; } MPI_File_set_view(*fh, 0, rowtype, rowtype, "native", MPI_INFO_NULL); result = MPI_File_read_at(*fh, offset, rbuf, numrows, rowtype, &status); if(result != MPI_SUCCESS) { printf("Proc %d read at %d error!\n", rank, offset); } /* for(i = 0; i < numrows; i++) { printf("Proc %d row %d: ", rank, i); for(j = 0; j < n; j++) { printf("%f, ", rbuf[i*n+j]); } printf("\n"); } */ //MPI_Barrier(MPI_COMM_WORLD); MPI_Type_free(&rowtype); return n*numrows; }
int main(int argc, char **argv){ int rank, size, bufsize, nints; MPI_File fh; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); bufsize = FILESIZE/size; nints = bufsize/sizeof(int); int buf[nints]; MPI_File_open(MPI_COMM_WORLD,"binaryfile",MPI_MODE_RDONLY,MPI_INFO_NULL,&fh); MPI_File_read_at(fh, rank*bufsize, buf, nints, MPI_INT, &status); printf("\nrank: %d, buf[%d]: %d \n", rank, rank*bufsize, buf[0]); MPI_File_close(&fh); MPI_Finalize(); return 0; }
raf_t MPI_Load_raf(char *name,MPI_Comm comm){ raf_t raf=(raf_t)RTmalloc(sizeof(struct raf_struct_s)); raf_init(raf,name); raf->blocksize=65536; MPI_File f; MPI_Comm_size(comm,&(raf->workers)); MPI_Comm_rank(comm,&(raf->rank)); int e=MPI_File_open(comm,name,MPI_MODE_RDONLY,MPI_INFO_NULL,&f); if(e){ int i=1024; char msg[1024]; MPI_Error_string(e,msg,&i); Fatal(0,error,"err is %s\n",msg); } MPI_File_set_errhandler(f,MPI_ERRORS_ARE_FATAL); MPI_File_get_size(f,&(raf->size)); if ((raf->size)%(raf->blocksize)) Fatal(0,error,"file not multiple of block size"); if (((raf->size)/(raf->blocksize))%(raf->workers)) Fatal(0,error,"block count not multiple of worker count"); //Warning(info,"my share is %d",(raf->size)/(raf->workers)); raf->data=RTmalloc((raf->size)/(raf->workers)); if (1) { Warning(info,"using MPI_File_read_all"); MPI_Datatype ftype; MPI_Type_vector((raf->size)/(raf->blocksize),(raf->blocksize),(raf->blocksize)*(raf->workers),MPI_CHAR,&ftype); MPI_Type_commit(&ftype); MPI_File_set_view(f,(raf->blocksize)*(raf->rank),MPI_CHAR,ftype,"native",MPI_INFO_NULL); MPI_File_read_all(f,raf->data,(raf->size)/(raf->workers),MPI_CHAR,MPI_STATUS_IGNORE); MPI_File_close(&f); MPI_Type_free(&ftype); } else { Warning(info,"using MPI_File_read_at"); int blockcount=((raf->size)/(raf->blocksize))/(raf->workers); for(int i=0;i<blockcount;i++){ MPI_File_read_at(f,((i*(raf->workers)+(raf->rank))*(raf->blocksize)), (raf->data)+(i*(raf->blocksize)),(raf->blocksize),MPI_CHAR,MPI_STATUS_IGNORE); } MPI_File_close(&f); } raf->rq_tag=core_add(raf,request_service); raf->ack_tag=core_add(raf,receive_service); raf->shared.read=read_at; raf->shared.size=mpi_size; raf->shared.close=mpi_close; //Warning(info,"file loaded"); return raf; }
JNIEXPORT void JNICALL Java_mpi_File_readAt( JNIEnv *env, jobject jthis, jlong fh, jlong fileOffset, jobject buf, jboolean db, jint off, jint count, jlong jType, jint bType, jlongArray stat) { MPI_Datatype type = (MPI_Datatype)jType; void *ptr; ompi_java_buffer_t *item; ompi_java_getWritePtr(&ptr, &item, env, buf, db, count, type); MPI_Status status; int rc = MPI_File_read_at((MPI_File)fh, (MPI_Offset)fileOffset, ptr, count, type, &status); ompi_java_exceptionCheck(env, rc); ompi_java_releaseWritePtr(ptr, item, env, buf, db, off, count, type, bType); ompi_java_status_set(env, stat, &status); }
int loadmatrix_cross_rows(MPI_File *fh, float *rbuf, int numrows, int rank, int numtasks, int m, int n) { MPI_Offset offset = 0; int i = 0, j = 0; MPI_Status status; MPI_Datatype rowtype; MPI_Datatype filetype; int result = 0; MPI_Type_contiguous(n, MPI_FLOAT, &rowtype); MPI_Type_commit(&rowtype); MPI_Type_vector(numrows, 1, numtasks, rowtype, &filetype); MPI_Type_commit(&filetype); offset = rank; MPI_File_set_view(*fh, offset*n*sizeof(float), rowtype, filetype, "native", MPI_INFO_NULL); result = MPI_File_read_at(*fh, 0, rbuf, numrows, rowtype, &status); if(result != MPI_SUCCESS) { printf("Proc %d read at %d error!\n", rank, offset); } if(rank == 2) { for(i = 0; i < numrows; i++) { printf("Proc %d read row %d: ", rank, i); for(j = 0; j < n; j++) { printf("%f, ", rbuf[i*n+j]); } printf("\n"); } } MPI_Type_free(&rowtype); MPI_Type_free(&filetype); return n*numrows; }
void ompi_file_read_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, MPI_Fint *count, MPI_Fint *datatype, MPI_Fint *status, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh = MPI_File_f2c(*fh); MPI_Datatype c_type = MPI_Type_f2c(*datatype); OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2) OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status) c_ierr = MPI_File_read_at(c_fh, (MPI_Offset) *offset, buf, OMPI_FINT_2_INT(*count), c_type, c_status); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr) }
int main(int argc,char *argv[]) { int np,Rank,i; int *Buffer,Buffer_size; MPI_Status status; MPI_File Fp; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&Rank); MPI_Comm_size(MPI_COMM_WORLD,&np); // Buffer_size = (File_size)/np; Buffer_size=1; Buffer =(int *)malloc(Buffer_size*(sizeof(int))); MPI_File_open(MPI_COMM_WORLD,"Readfile",MPI_MODE_RDONLY,MPI_INFO_NULL,&Fp); MPI_File_read_at(Fp,0,Buffer,Buffer_size,MPI_INT,&status); printf("process: %d read \n",Rank); for(i=0;i<Buffer_size;i++) { printf("%d\n",Buffer[i]); } MPI_File_close(&Fp); MPI_Finalize(); }
int main( int argc, char *argv[] ) { unsigned int itr; int operacao; int verbose; int juntar; char * chave_file; char * entrada_file; char * saida_file; octeto Nb,Nk,Nr; octeto bloco[4*8]; octeto chave[4*8*15]; int worldsize, rank; MPI_Status status; MPI_File chave_handle; MPI_File entrada_handle; MPI_File saida_handle; MPI_Offset entrada_bytes; unsigned int numero_blocos; unsigned int blocos_processo; MPI_Offset bloco_byte_inicio; MPI_Offset bloco_byte_fim; MPI_Offset iterador; Tabela * tabela; octeto * tabelaEmpacotada; unsigned int proc; unsigned int tamanho_tabela; Tabela * tabela2; unsigned int no_proc; unsigned int no_resto; unsigned int i; BTreeNode * node; Indice * indice; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&worldsize); MPI_Comm_rank(MPI_COMM_WORLD,&rank); operacao = INDEFINIDA; verbose = 0; juntar = 0; chave_file = NULL; entrada_file = NULL; saida_file = NULL; for(itr = 1;itr < (unsigned int)argc;itr++) { /* Instrucoes de uso */ if( strcmp(argv[itr],"-a") == 0 || strcmp(argv[itr],"--ajuda") == 0 || strcmp(argv[itr],"-h") == 0 || strcmp(argv[itr],"--help") == 0 ) { if(rank == 0) { printf(" Uso: mpiexec -n [PROCESSOS] ./sm-rijndael [ARGUMENTO VALOR].\n"); printf(" Encripta/Decripta um arquivo usando o algoritmo Rijndael(AES) extendido,\n"); printf(" realizando um pre-processamento de blocos repetidos.\n"); printf(" Argumentos opcionais:\n"); printf(" -v,--verbose: Exibe mensagens de conclusao da operacao.\n"); printf(" -j,--juntar: Concatena as tabelas de cada processo em um mestre.\n"); printf(" Argumentos obrigatorios:\n"); printf(" -op,--operacao: Informa se o objetivo da execucao eh encriptar ou decriptar.\n"); printf(" * Os valores possiveis sao: \'encriptar\' e \'decriptar\'.\n"); printf(" -e,-i,--entrada,--input: Caminho e nome do arquivo a ser criptografado.\n"); printf(" -s,-o,--saida,--output: Caminho e nome do arquivo resultante do processo de criptografia da entrada.\n"); printf(" -c,-k,--chave,--key: Caminho e nome do arquivo contendo a chave.\n"); printf(" O arquivo contendo a chave eh em formato binario de acordo com a seguinte especificacao:\n"); printf(" - O primeiro byte deve conter o tamanho do bloco (em palavras de 4 bytes).\n"); printf(" * O bloco pode possuir tamanho: 4, 5, 6, 7 ou 8.\n"); printf(" - O segundo byte deve conter o tamanho da chave (em palavras de 4 bytes).\n"); printf(" * Esta aplicacao aceita chaves com tamanho: 4, 5, 6, 7 ou 8.\n"); printf(" - Os proximos 4*[tamanho da chave] bytes do arquivo sao os bytes componentes da chave, que\n"); printf(" devem estar (obrigatoriamente) escritos no formato hexadecimal da linguagem C (0xff).\n"); printf(" * Eh recomendavel o uso de um editor hexadecimal na construcao do arquivo chave.\n"); } goto finalizando; } /* Juntar: Concatena as tabelas de cada processo em um mestre */ else if( strcmp(argv[itr],"-j") == 0 || strcmp(argv[itr],"--juntar") == 0) { juntar = 1; } /* Verbose: exibir mensagens de finalizacao */ else if( strcmp(argv[itr],"-v") == 0 || strcmp(argv[itr],"--verbose") == 0) { verbose = 1; } /* Operacao a ser realizada */ else if( strcmp(argv[itr],"-op") == 0 || strcmp(argv[itr],"--operacao") == 0 ) { if( itr+1 < argc ) { if( strcmp(argv[itr+1],"encriptar") == 0 ) { operacao = ENCRIPTAR; } else if( strcmp(argv[itr+1],"decriptar") == 0 ) { operacao = DECRIPTAR; } itr++; } else { goto sempar; } } /* Arquivo com a chave */ else if( strcmp(argv[itr],"-c") == 0 || strcmp(argv[itr],"--chave") == 0 || strcmp(argv[itr],"-k") == 0 || strcmp(argv[itr],"--key") == 0 ) { if(itr+1 < argc) { chave_file = argv[itr+1]; itr++; } else { goto sempar; } } /* Arquivo de entrada */ else if( strcmp(argv[itr],"-e") == 0 || strcmp(argv[itr],"--entrada") == 0 || strcmp(argv[itr],"-i") == 0 || strcmp(argv[itr],"--input") == 0 ) { if(itr+1 < argc) { entrada_file = argv[itr+1]; itr++; } else { goto sempar; } } /* Arquivo de saida */ else if( strcmp(argv[itr],"-s") == 0 || strcmp(argv[itr],"--saida") == 0 || strcmp(argv[itr],"-o") == 0 || strcmp(argv[itr],"--output") == 0 ) { if(itr+1 < argc) { saida_file = argv[itr+1]; itr++; } else { goto sempar; } } /* Erro desconhecido */ else { if(rank == 0) { printf("Erro nos argumentos passados.\n"); } goto help; } } /* Fim da leitura dos argumentos */ if( operacao == INDEFINIDA || chave_file == NULL || entrada_file == NULL || saida_file == NULL ) { if(rank == 0) { if( operacao == INDEFINIDA ) printf("A operacao a ser realizada eh invalida ou nao foi especificada.\n"); if( chave_file == NULL ) printf("Esta faltando especificar o arquivo com a chave.\n"); if( entrada_file == NULL ) printf("Esta faltando especificar o arquivo de entrada.\n"); if( saida_file == NULL ) printf("Esta faltando especificar o arquivo de saida.\n"); } goto help; } /* Fim do tratamento dos argumentos */ if( MPI_File_open( MPI_COMM_WORLD, chave_file, MPI_MODE_RDONLY, MPI_INFO_NULL, &chave_handle ) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na abertura do arquivo com a chave (%s).\n",chave_file); } goto help; } if( MPI_File_read(chave_handle,&Nb,1, MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na leitura do tamanho de um bloco no arquivo com a chave (%s).\n",chave_file); } goto help; } if( Nb< 4 || Nb > 8 ) { if( rank == 0 ) { printf("Tamanho de bloco invalido no arquivo com a chave (%s).\n",chave_file); } goto help; } if( MPI_File_read(chave_handle,&Nk,1, MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na leitura do tamanho da chave no arquivo com a chave (%s).\n",chave_file); } goto help; } if( Nk< 4 || Nk > 8 ) { if( rank == 0 ) { printf("Tamanho de chave invalido no arquivo com a chave (%s).\n",chave_file); } goto help; } if( MPI_File_read(chave_handle,chave,4*Nk,MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na leitura da chave no arquivo com a chave (%s).\n",chave_file); } goto help; } MPI_File_close( &chave_handle ); Nr = numero_rodadas(Nb,Nk); KeyExpansion(chave,Nb,Nk); if( MPI_File_open( MPI_COMM_WORLD, entrada_file, MPI_MODE_RDONLY, MPI_INFO_NULL, &entrada_handle ) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na abertura do arquivo de entrada (%s).\n",entrada_file); } goto help; } MPI_File_get_size(entrada_handle,&entrada_bytes); if( MPI_File_open( MPI_COMM_WORLD, saida_file, MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL, MPI_INFO_NULL, &saida_handle ) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro na criacao do arquivo de saida (%s).\n",saida_file); printf("Uma possivel causa eh que o arquivo ja exista.\n"); } goto help; } numero_blocos = ( entrada_bytes / (Nb*4) ); blocos_processo = numero_blocos / worldsize; if( operacao == ENCRIPTAR || operacao == DECRIPTAR ) { bloco_byte_inicio = 4*Nb*blocos_processo*rank; bloco_byte_fim = 4*Nb*blocos_processo*(rank+1); tabela = novaTabela(Nb*4); for( iterador = bloco_byte_inicio ; iterador < bloco_byte_fim ; iterador += (4*Nb) ) { if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file); } goto help; } novaOcorrenciaTabela(tabela,bloco,iterador); } iterador = 4*Nb*blocos_processo*worldsize + 4*Nb*rank; if( iterador < numero_blocos*4*Nb ) { if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file); } goto help; } novaOcorrenciaTabela(tabela,bloco,iterador); } else if( operacao == ENCRIPTAR && iterador == numero_blocos*4*Nb ) { if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file); } goto help; } bloco[ 4*Nb - 1 ] = (octeto)(entrada_bytes - numero_blocos*4*Nb); novaOcorrenciaTabela(tabela,bloco,iterador); } if( juntar == 1 ) { tabelaEmpacotada = (octeto*)malloc( entrada_bytes ); if( rank == 0 ) /* Mestre que vai concatenar todas as arvores*/ { for(proc=1;proc<worldsize;proc++) { MPI_Recv( tabelaEmpacotada, entrada_bytes, MPI_BYTE, MPI_ANY_SOURCE, TAG_TABELA_EMPACOTADA, MPI_COMM_WORLD, &status ); desempacotarInserindo(tabelaEmpacotada,tabela); } tamanho_tabela = numeroBlocosTabela(tabela); no_proc = (tamanho_tabela / worldsize); no_resto = (tamanho_tabela % worldsize); tabela2 = novaTabela(Nb*4); for(proc=1;proc<worldsize;proc++) { for(i=0;i<no_proc;i++) { soInsiraTabela(tabela2, popLastTabelaNode(tabela) ); } if( no_resto > 1 ) { soInsiraTabela(tabela2, popLastTabelaNode(tabela) ); no_resto--; } empacotarTabela(tabela2,tabelaEmpacotada); MPI_Send(tabelaEmpacotada,numeroBytesTabela(tabela2), MPI_BYTE, proc, TAG_TABELA_EMPACOTADA_2, MPI_COMM_WORLD ); destruirArvore(tabela2->root); tabela2->root = NULL; } destruirTabela(tabela2); } else { empacotarTabela(tabela,tabelaEmpacotada); MPI_Send(tabelaEmpacotada,numeroBytesTabela(tabela), MPI_BYTE, 0, TAG_TABELA_EMPACOTADA, MPI_COMM_WORLD ); destruirArvore(tabela->root); tabela->root = NULL; MPI_Recv( tabelaEmpacotada, entrada_bytes, MPI_BYTE, 0, TAG_TABELA_EMPACOTADA_2, MPI_COMM_WORLD, &status ); desempacotarInserindo(tabelaEmpacotada,tabela); } free(tabelaEmpacotada); } if( operacao == ENCRIPTAR ) MPI_File_set_size(saida_handle,(MPI_Offset)( (numero_blocos+1)*(Nb*4) ) ); else if( operacao == DECRIPTAR ) MPI_File_set_size(saida_handle,entrada_bytes); tamanho_tabela = numeroBlocosTabela(tabela); for( i=0 ; i<tamanho_tabela ; i++ ) { node = popLastTabelaNode(tabela); // memcpy (bloco,node->bloco,4*Nb); if( operacao == ENCRIPTAR ) AES_encriptar_bloco(node->bloco,Nb,chave,Nr); else if( operacao == DECRIPTAR ) AES_decriptar_bloco(node->bloco,Nb,chave,Nr); indice = node->ocorrencias; while( indice != NULL ) { if( MPI_File_write_at(saida_handle,indice->indice,node->bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro ao escrever no arquivo de saida (%s).\n",saida_file); } goto help; } indice = indice->next; } destruirArvore(node); } destruirTabela(tabela); if( operacao == DECRIPTAR ) { MPI_Barrier( MPI_COMM_WORLD ); /*Barreira q impede q alguem leia antes do valor decriptografado ser escrito */ if( MPI_File_read_at(saida_handle,entrada_bytes-1,bloco,1,MPI_BYTE,&status) != MPI_SUCCESS ) { if( rank == 0 ) { printf("Erro ao realizar leitura no arquivo de saida (%s).\n",saida_file); } goto help; } MPI_Barrier( MPI_COMM_WORLD ); /* Barreira q impede q alqum processo trunque o arquivo antes de outro processo ler*/ MPI_File_set_size(saida_handle,entrada_bytes - 4*Nb + bloco[0]); } if( rank == 0 && verbose==1) { if( operacao == ENCRIPTAR ) printf("A encriptacao do arquivo foi realizada com sucesso.\n"); else if( operacao == DECRIPTAR ) printf("A decriptacao do arquivo foi realizada com sucesso.\n"); } } goto finalizando; sempar: if( rank == 0 ) { printf("Sem par correspondente para a opcao %s.\n",argv[itr]); } help: if( rank == 0 ) { printf("Use a opcao --help para melhor entendimento do uso da aplicacao.\n"); } finalizando: MPI_Finalize( ); return 0; }
int main(int argc, char **argv) { char *buf, *tmp, *buf2, *tmp2, *check; int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno; double stim, etim; double write_tim = 0; double read_tim = 0; double read_bw, write_bw; double max_read_tim, max_write_tim; double min_read_tim, min_write_tim; double ave_read_tim, ave_write_tim; int64_t iter_jump = 0; int64_t seek_position = 0; MPI_File fh; MPI_Status status; int nchars; /* startup MPI and determine the rank of this process */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); /* parse the command line arguments */ parse_args(argc, argv); if (mynod == 0) printf("# Using mpi-io calls.\n"); /* kindof a weird hack- if the location of the pvfstab file was * specified on the command line, then spit out this location into * the appropriate environment variable: */ #if H5_HAVE_SETENV /* no setenv or unsetenv */ if (opt_pvfstab_set) { if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){ perror("setenv"); goto die_jar_jar_die; } } #endif /* this is how much of the file data is covered on each iteration of * the test. used to help determine the seek offset on each * iteration */ iter_jump = nprocs * opt_block; /* setup a buffer of data to write */ if (!(tmp = (char *) malloc(opt_block + 256))) { perror("malloc"); goto die_jar_jar_die; } buf = tmp + 128 - (((long)tmp) % 128); /* align buffer */ if (opt_correct) { /* do the same buffer setup for verifiable data */ if (!(tmp2 = (char *) malloc(opt_block + 256))) { perror("malloc2"); goto die_jar_jar_die; } buf2 = tmp + 128 - (((long)tmp) % 128); } /* open the file for writing */ err = MPI_File_open(MPI_COMM_WORLD, opt_file, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (err < 0) { fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno)); goto die_jar_jar_die; } /* now repeat the write operations the number of times * specified on the command line */ for (j=0; j < opt_iter; j++) { /* calculate the appropriate position depending on the iteration * and rank of the current process */ seek_position = (j*iter_jump)+(mynod*opt_block); if (opt_correct) /* fill in buffer for iteration */ { for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i; } /* discover the starting time of the operation */ MPI_Barrier(MPI_COMM_WORLD); stim = MPI_Wtime(); /* write out the data */ nchars = opt_block/sizeof(char); err = MPI_File_write_at(fh, seek_position, buf, nchars, MPI_CHAR, &status); if(err){ fprintf(stderr, "node %d, write error: %s\n", mynod, strerror(errno)); } /* discover the ending time of the operation */ etim = MPI_Wtime(); write_tim += (etim - stim); /* we are done with this "write" iteration */ } err = MPI_File_close(&fh); if(err){ fprintf(stderr, "node %d, close error after write\n", mynod); } /* wait for everyone to synchronize at this point */ MPI_Barrier(MPI_COMM_WORLD); /* reopen the file to read the data back out */ err = MPI_File_open(MPI_COMM_WORLD, opt_file, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (err < 0) { fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno)); goto die_jar_jar_die; } /* we are going to repeat the read operation the number of iterations * specified */ for (j=0; j < opt_iter; j++) { /* calculate the appropriate spot give the current iteration and * rank within the MPI processes */ seek_position = (j*iter_jump)+(mynod*opt_block); /* discover the start time */ MPI_Barrier(MPI_COMM_WORLD); stim = MPI_Wtime(); /* read in the file data */ if (!opt_correct){ err = MPI_File_read_at(fh, seek_position, buf, nchars, MPI_CHAR, &status); } else{ err = MPI_File_read_at(fh, seek_position, buf2, nchars, MPI_CHAR, &status); } myerrno = errno; /* discover the end time */ etim = MPI_Wtime(); read_tim += (etim - stim); if (err < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n", mynod, mynod*opt_block, strerror(myerrno)); /* if the user wanted to check correctness, compare the write * buffer to the read buffer */ if (opt_correct && memcmp(buf, buf2, opt_block)) { fprintf(stderr, "node %d, correctness test failed\n", mynod); my_correct = 0; MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); } /* we are done with this read iteration */ } /* close the file */ err = MPI_File_close(&fh); if(err){ fprintf(stderr, "node %d, close error after write\n", mynod); } /* compute the read and write times */ MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); /* calculate the average from the sum */ ave_read_tim = ave_read_tim / nprocs; MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); /* calculate the average from the sum */ ave_write_tim = ave_write_tim / nprocs; /* print out the results on one node */ if (mynod == 0) { read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0); write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0); printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs, opt_iter, (long)opt_block); printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter)); printf("# Write: min_time = %f, max_time = %f, mean_time = %f\n", min_write_tim, max_write_tim, ave_write_tim); printf("# Read: min_time = %f, max_time = %f, mean_time = %f\n", min_read_tim, max_read_tim, ave_read_tim); printf("Write bandwidth = %f Mbytes/sec\n", write_bw); printf("Read bandwidth = %f Mbytes/sec\n", read_bw); if (opt_correct) { printf("Correctness test %s.\n", correct ? "passed" : "failed"); } } die_jar_jar_die: #if H5_HAVE_SETENV /* no setenv or unsetenv */ /* clear the environment variable if it was set earlier */ if (opt_pvfstab_set){ unsetenv("PVFSTAB_FILE"); } #endif free(tmp); if (opt_correct) free(tmp2); MPI_Finalize(); return(0); }
void readlines(MPI_File *in, const int rank, const int size, const int overlap, /*char ***lines,*/ int *nlines) { MPI_Offset filesize; MPI_Offset localsize; MPI_Offset start; MPI_Offset end; char *chunk; MPI_Offset bytesRead=0; MPI_Offset myBytesRead = 4000000, myActualRead = 0; MPI_File_get_size(*in, &filesize); int control = 0; printf("Rank %d started\n", rank); while(bytesRead < filesize){ // while (0){ /* figure out who reads what */ start = (rank * myBytesRead) + bytesRead; end = start + myBytesRead - 1; /* add overlap to the end of everyone's chunk... */ if (end > filesize || (end + overlap) > filesize) end = filesize; else end += overlap; if(start < filesize){ localsize = end - start + 1; }else{ start = filesize - 1; localsize = 0; } /* allocate memory */ chunk = (char *)malloc( (localsize + 1)*sizeof(char)); /* everyone reads in their part */ MPI_File_read_at(*in, start, chunk, localsize, MPI_CHAR, MPI_STATUS_IGNORE); chunk[localsize] = '\0'; /* * everyone calculate what their start and end *really* are by going * from the first newline after start to the first newline after the * overlap region starts (eg, after end - overlap + 1) */ int locstart=0, locend=localsize; if (localsize != 0) { if (rank != 0) { while(chunk[locstart] != '\n' || chunk[locstart+1] != '+' || chunk[locstart+2] != '\n'){ locstart++; } locstart += 3; while(chunk[locstart] != '\n'){ locstart++; } locstart++; } if (end != filesize) { locend -= overlap; while(chunk[locend] != '\n' || chunk[locend+1] != '+' || chunk[locend+2] != '\n'){ locend++; } locend += 3; while(chunk[locend] != '\n'){ locend++; } locend++; } } // what was actually read by Pi myActualRead = locend-locstart; if(rank == 0) myActualRead += bytesRead; /* Now we'll count the number of lines */ /************************/ // This part represents the processing: // while (fastq_file.ReadNextRecord(rec)) std::string s = std::string(&chunk[locstart], &chunk[locend]); size_t n = std::count(s.begin(), s.end(), '\n'); uint32 varSuperblockSize = (unsigned int) n/4; //printf("Rank %d's superblock has %ld records. #%d\n", rank, varSuperblockSize, control); // int varBlockSize = 32; // for (;;) // { // if(n % varBlockSize != 0) // varBlockSize++; // else // break; //} //printf(" ==>Rank %d's block has %d records. #%d\n", rank, varBlockSize, control); FastqRecord rec; // DsrcFile dsrc_file(varSuperblockSize); //dsrc_file.StartCompress("test"); // who variable decides if processing title or DNA or plus or QS int who = 0; bool errorFree[4] = {false, false, false, false}; int64 rec_no = 0; // ** READING A RECORD (TITLE, DNA SEQ, PLUS, QUALITY SCORE) int j = locstart; while (j < locend){ switch (who){ // Read title case 0: { uint32 i = 0; for (;;){ int32 c = chunk[j++]; if (c != '\n' && c != '\r'){ if (i >= rec.title_size){ rec.Extend(rec.title, rec.title_size); } rec.title[i++] = (uchar) c; } else if (i > 0){ break; } } rec.title[i] = 0; rec.title_len = i; errorFree[who++] = i > 0 && rec.title[0] == '@'; break; } // Read DNA sequence case 1:{ uint32 i = 0; int32 c; if (rec.sequence_breaks){ delete rec.sequence_breaks; rec.sequence_breaks = NULL; } uint32 last_eol_pos = 0; uint32 sequence_break = 0; for (;;){ c = chunk[j++]; if (c == '+'){ j--; break; } //if (c == FILE_EOF) // break; if (c != '\n' && c != '\r'){ if (i >= rec.sequence_size){ rec.Extend(rec.sequence, rec.sequence_size); } rec.sequence[i++] = (uchar) c; } else{ if (last_eol_pos != i){ if (sequence_break){ if (!rec.sequence_breaks){ rec.sequence_breaks = new std::vector<int>; } rec.sequence_breaks->push_back(sequence_break); } else{ sequence_break = i - last_eol_pos; } last_eol_pos = i; } } } rec.sequence[i] = 0; rec.sequence_len = i; errorFree[who++] = true; break; } // Read "+" case 2:{ uint32 i = 0; int32 c; for (;;){ c = chunk[j++]; //if (c == FILE_EOF) // break; if (c != '\n' && c != '\r'){ if (i >= rec.plus_size){ rec.Extend(rec.plus, rec.plus_size); } rec.plus[i++] = (uchar) c; } else if (i > 0){ break; } } rec.plus[i] = 0; rec.plus_len = i; errorFree[who++] = i > 0; break; } // Read quality score case 3:{ uint32 i; uint32 last_eol_pos = 0; if (rec.quality_breaks){ delete rec.quality_breaks; rec.quality_breaks = NULL; } if (rec.sequence_size > rec.quality_size) rec.ExtendTo(rec.quality, rec.quality_size, rec.sequence_size); for (i = 0; i < rec.sequence_len;){ int32 c = chunk[j++]; //if (c == FILE_EOF) // break; if (c != '\n' && c != '\r'){ rec.quality[i++] = (uchar)c; } else{ if (last_eol_pos != i){ if (!rec.quality_breaks){ rec.quality_breaks = new std::vector<int>; } rec.quality_breaks->push_back(i - last_eol_pos); last_eol_pos = i; } } } j++; // get the newline rec.quality[i] = 0; rec.quality_len = i; errorFree[who++] = (i == rec.sequence_len); break; } } // If a full record has been read if(who == 4){ if(errorFree[0] && errorFree[1] && errorFree[2] && errorFree[2]){ // dsrc_file.WriteRecord(rec); //printf("Rank %d has %ld processed\n", rank, rec_no); ++rec_no; who = 0; } else{ printf("Rank %d has an error\n", rank); break; } } //if (chunk[i] == '\n'){ // (*nlines)++; // } } //dsrc_file.FinishCompress(); free(chunk); //printf("Rank %d's superblock has %ld records. #%d\n", rank, (*nlines)/4, control); //*nlines = 0; MPI_Reduce(&myActualRead, &bytesRead, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Bcast(&bytesRead, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD); // if(rank == 0) //printf("%d.>> bytesRead: %ld\n", control, bytesRead); control++; } // ReadNextRecord if(rank == 0) printf("%d.*** bytesRead: %ld | filesize: %ld ***\n", control, bytesRead, filesize); return; }
int main (int argc, char *argv[]){ char *x, *y, *z, *xbuf, *hbuf, *chrNames[MAXNBCHR]; int fd; off_t hsiz; struct stat st; MPI_File mpi_filed; MPI_File mpi_file_split_comm; MPI_Offset fileSize, unmapped_start, discordant_start; int num_proc, rank; int res, nbchr, i, paired, write_sam; int ierr, errorcode = MPI_ERR_OTHER; char *file_name, *output_dir; char *header; unsigned int headerSize; unsigned char threshold; size_t input_file_size; size_t unmappedSize = 0; size_t discordantSize = 0; size_t *readNumberByChr = NULL, *localReadNumberByChr = NULL; Read **reads; double time_count; double time_count1; int g_rank, g_size; MPI_Comm split_comm; //used to split communication when jobs have no reads to sort int split_rank, split_size; //after split communication we update the rank and the size double tic, toc; int compression_level; size_t fsiz, lsiz, loff; const char *sort_name; MPI_Info finfo; /* Set default values */ compression_level = 3; parse_mode = MODE_OFFSET; sort_name = "coordinate"; paired = 0; threshold = 0; write_sam = 0; /* Check command line */ while ((i = getopt(argc, argv, "c:hnpq:")) != -1) { switch(i) { case 'c': /* Compression level */ compression_level = atoi(optarg); break; case 'h': /* Usage display */ usage(basename(*argv)); return 0; case 'n': parse_mode = MODE_NAME; sort_name = "queryname"; break; case 'p': /* Paired reads */ paired = 1; break; case 'q': /* Quality threshold */ threshold = atoi(optarg); break; default: usage(basename(*argv)); return 1; } } if (argc - optind != 2) { usage(basename(*argv)); return 1; } file_name = argv[optind]; output_dir = argv[optind+1]; /* Check arguments */ res = access(file_name, F_OK|R_OK); if (res == -1) err(1, "%s", file_name); res = access(output_dir, F_OK|W_OK); if (res == -1) err(1, "%s", output_dir); /* MPI inits */ res = MPI_Init(&argc, &argv); assert(res == MPI_SUCCESS); res = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(res == MPI_SUCCESS); res = MPI_Comm_size(MPI_COMM_WORLD, &num_proc); assert(res == MPI_SUCCESS); g_rank = rank; g_size = num_proc; /* Small summary */ if (rank == 0) { fprintf(stderr, "Number of processes : %d\n", num_proc); fprintf(stderr, "Reads' quality threshold : %d\n", threshold); fprintf(stderr, "Compression Level is : %d\n", compression_level); fprintf(stderr, "SAM file to read : %s\n", file_name); fprintf(stderr, "Output directory : %s\n", output_dir); } /* Process input file */ fd = open(file_name, O_RDONLY, 0666); assert(fd != -1); assert(fstat(fd, &st) != -1); xbuf = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0); assert(xbuf != MAP_FAILED); /* Parse SAM header */ memset(chrNames, 0, sizeof(chrNames)); x = xbuf; nbchr = 0; while (*x == '@') { y = strchr(x, '\n'); z = x; x = y + 1; if (strncmp(z, "@SQ", 3) != 0) continue; /* Save reference names */ y = strstr(z, "SN:"); assert(y != NULL); z = y + 3; while (*z && !isspace((unsigned char)*z)) z++; chrNames[nbchr++] = strndup(y + 3, z - y - 3); assert(nbchr < MAXNBCHR - 2); } chrNames[nbchr++] = strdup(UNMAPPED); chrNames[nbchr++] = strdup(DISCORDANT); hsiz = x - xbuf; hbuf = strndup(xbuf, hsiz); if (rank == 0) { fprintf(stderr, "The size of the file is %zu bytes\n", (size_t)st.st_size); fprintf(stderr, "Header has %d+2 references\n", nbchr - 2); } asprintf(&header, "@HD\tVN:1.0\tSO:%s\n%s", sort_name, hbuf); free(hbuf); assert(munmap(xbuf, (size_t)st.st_size) != -1); assert(close(fd) != -1); //task FIRST FINE TUNING FINFO FOR READING OPERATIONS MPI_Info_create(&finfo); /* * In this part you shall adjust the striping factor and unit according * to the underlying filesystem. * Harmless for other file system. * */ MPI_Info_set(finfo,"striping_factor", STRIPING_FACTOR); MPI_Info_set(finfo,"striping_unit", STRIPING_UNIT); //2G striping MPI_Info_set(finfo,"ind_rd_buffer_size", STRIPING_UNIT); //2gb buffer MPI_Info_set(finfo,"romio_ds_read",DATA_SIEVING_READ); /* * for collective reading and writing * should be adapted too and tested according to the file system * Harmless for other file system. */ MPI_Info_set(finfo,"nb_proc", NB_PROC); MPI_Info_set(finfo,"cb_nodes", CB_NODES); MPI_Info_set(finfo,"cb_block_size", CB_BLOCK_SIZE); MPI_Info_set(finfo,"cb_buffer_size", CB_BUFFER_SIZE); //we open the input file ierr = MPI_File_open(MPI_COMM_WORLD, file_name, MPI_MODE_RDONLY , finfo, &mpi_filed); //assert(in != -1); if (ierr){ if (rank == 0) fprintf(stderr, "%s: Failed to open file in process 0 %s\n", argv[0], argv[1]); MPI_Abort(MPI_COMM_WORLD, errorcode); exit(2); } ierr = MPI_File_get_size(mpi_filed, &fileSize); assert(ierr == MPI_SUCCESS); input_file_size = (long long)fileSize; /* Get chunk offset and size */ fsiz = input_file_size; lsiz = fsiz / num_proc; loff = rank * lsiz; tic = MPI_Wtime(); headerSize = unmappedSize = discordantSize = strlen(header); //We place file offset of each process to the begining of one read's line size_t *goff =(size_t*)calloc((size_t)(num_proc+1), sizeof(size_t)); init_goff(mpi_filed,hsiz,input_file_size,num_proc,rank,goff); //We calculate the size to read for each process lsiz = goff[rank+1]-goff[rank]; //NOW WE WILL PARSE size_t j=0; size_t poffset = goff[rank]; //Current offset in file sam //nbchr because we add the discordant reads in the structure reads = (Read**)malloc((nbchr)*sizeof(Read));//We allocate a linked list of struct for each Chromosome (last chr = unmapped reads) readNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome localReadNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome Read ** anchor = (Read**)malloc((nbchr)*sizeof(Read));//Pointer on the first read of each chromosome //Init first read for(i = 0; i < (nbchr); i++){ reads[i] = malloc(sizeof(Read)); reads[i]->coord = 0; anchor[i] = reads[i]; readNumberByChr[i]=0; } toc = MPI_Wtime(); char *local_data_tmp = malloc(1024*1024); char *local_data =(char*)malloc(((goff[rank+1]-poffset)+1)*sizeof(char)); size_t size_tmp= goff[rank+1]-poffset; local_data[goff[rank+1]-poffset] = 0; char *q=local_data; //We read the file sam and parse while(poffset < goff[rank+1]){ size_t size_to_read = 0; if( (goff[rank+1]-poffset) < DEFAULT_INBUF_SIZE ){ size_to_read = goff[rank+1]-poffset; } else{ size_to_read = DEFAULT_INBUF_SIZE; } // we load the buffer //hold temporary size of SAM //due to limitation in MPI_File_read_at local_data_tmp =(char*)realloc(local_data_tmp, (size_to_read+1)*sizeof(char)); local_data_tmp[size_to_read]=0; // Original reading part is before 18/09/2015 MPI_File_read_at(mpi_filed, (MPI_Offset)poffset, local_data_tmp, size_to_read, MPI_CHAR, MPI_STATUS_IGNORE); size_t local_offset=0; assert(strlen(local_data_tmp) == size_to_read); //we look where is the last line read for updating next poffset size_t offset_last_line = size_to_read-1; size_t extra_char=0; while(local_data_tmp[offset_last_line] != '\n'){ offset_last_line -- ; extra_char++; } local_data_tmp[size_to_read - extra_char]=0; size_t local_data_tmp_sz = strlen(local_data_tmp); //If it s the last line of file, we place a last '\n' for the function tokenizer if(rank == num_proc-1 && ((poffset+size_to_read) == goff[num_proc])){ local_data_tmp[offset_last_line]='\n'; } //Now we parse Read in local_data parser_paired(local_data_tmp, rank, poffset, threshold, nbchr, &readNumberByChr, chrNames, &reads); //now we copy local_data_tmp in local_data char *p = local_data_tmp; int pos =0; while (*p && (pos < local_data_tmp_sz)) {*q=*p;p++;q++;pos++;} //we go to the next line poffset+=(offset_last_line+1); local_offset+=(offset_last_line+1); } assert(size_tmp == strlen(local_data)); fprintf(stderr, "%d (%.2lf)::::: *** FINISH PARSING FILE ***\n", rank, MPI_Wtime()-toc); if (local_data_tmp) free(local_data_tmp); malloc_trim(0); MPI_Barrier(MPI_COMM_WORLD); //We set attribute next of the last read and go back to first read of each chromosome for(i = 0; i < nbchr; i++){ reads[i]->next = NULL; reads[i] = anchor[i]; } free(anchor); //We count how many reads we found size_t nb_reads_total =0,nb_reads_global =0; for(j=0;j<nbchr;j++){ nb_reads_total+=readNumberByChr[j]; } MPI_Allreduce(&nb_reads_total, &nb_reads_global, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD); /* * We care for unmapped and discordants reads */ int s = 0; for (s = 1; s < 3; s++){ MPI_File mpi_file_split_comm2; double time_count; size_t total_reads = 0; MPI_Allreduce(&readNumberByChr[nbchr-s], &total_reads , 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); if ((rank == 0) && (s == 1)) fprintf(stderr, "rank %d :::: total read to sort for unmapped = %zu \n", rank, total_reads); if ((rank == 0) && (s == 2)) fprintf(stderr, "rank %d :::: total read to sort for discordant = %zu \n", rank, total_reads); MPI_Barrier(MPI_COMM_WORLD); if (total_reads == 0){ // nothing to sort for unmapped // maybe write an empty bam file } else{ int i1,i2; size_t *localReadsNum_rank0 = (size_t *)malloc(num_proc*sizeof(size_t)); localReadsNum_rank0[0] = 0; int file_pointer_to_free = 0; int split_comm_to_free = 0; //we build a vector with rank job int val_tmp1 = 0; int val_tmp2 = 0; int chosen_rank = 0; // the color tells in what communicator the rank pertain // color = 0 will be the new communicator color // otherwise the color is 1 int *color_vec_to_send = (int *)malloc(num_proc*sizeof(int)); // the key value tell the order in the new communicator int *key_vec_to_send = (int *)malloc(num_proc*sizeof(int)); //rank 0 gather the vector MPI_Allgather(&readNumberByChr[nbchr-s] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if (rank == 0){ //we must chose the first rank with reads to sort i1=0; while (localReadsNum_rank0[i1] == 0){ chosen_rank++; i1++; } } //we broadcast the chosen rank //task: replace the broadcast with a sendrecieve MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); //we must chose which rank is going to split the communication if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){ //the rank 0 will recieve the key_vec_to_send and colorvec_to_send //first we exchange the size o if (rank == chosen_rank){ header=(char *)malloc((headerSize + 1)*sizeof(char)); MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rank == 0){ MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank, 0, MPI_COMM_WORLD); } } else { //we do nothing here } if (rank == chosen_rank) { int counter = 0; //we compute the number of 0 in the localReadsNum_vec for(i1 = 0; i1 < num_proc; i1++){ if (localReadsNum_rank0[i1] == 0) { counter++; } } // if no jobs without reads we do nothing if ( counter == 0 ){ // nothing to do we associate split_comm with split_comm = MPI_COMM_WORLD; for (i2 = 0; i2 < num_proc; i2++) { if (localReadsNum_rank0[i2] == 0) { color_vec_to_send[i2] = 1; key_vec_to_send[i2] = val_tmp2; val_tmp2++; } else { color_vec_to_send[i2] = 0; key_vec_to_send[i2] = val_tmp1; val_tmp1++; } } } else{ // now we compute the color according to // the number of reads to sort for(i2 = 0; i2 < num_proc; i2++){ if (localReadsNum_rank0[i2] == 0){ color_vec_to_send[i2] = 1; key_vec_to_send[i2] = val_tmp2; val_tmp2++; } else{ color_vec_to_send[i2] = 0; key_vec_to_send[i2] = val_tmp1; val_tmp1++; } } // end for loop }// end if }// end if (rank == chosen_rank) MPI_Barrier(MPI_COMM_WORLD); // we scatter the key and color vector // we create key and color variable for each job int local_color = 0; int local_key = 0; // we scatter the color and key MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD); MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD); // we create a communicator // we group all communicator // with color of zero if (local_color == 0){ MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm); ierr = MPI_File_open(split_comm, file_name, MPI_MODE_RDONLY , finfo, &mpi_file_split_comm2); //we ask to liberate file pointer file_pointer_to_free = 1; //we ask to liberate the split_comm split_comm_to_free = 1; } else{ MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm); mpi_file_split_comm2 = mpi_filed; } //now we change the rank in the reads structure if (local_color == 0){ MPI_Comm_rank(split_comm, &split_rank); MPI_Comm_size(split_comm, &split_size); g_rank = split_rank; g_size = split_size; reads[nbchr-s] = reads[nbchr-s]->next; localReadNumberByChr[nbchr-s] = readNumberByChr[nbchr-s]; if (s == 2){ unmapped_start = startOffset(g_rank, g_size, unmappedSize, headerSize, nbchr-s, localReadNumberByChr[nbchr-s], split_comm ); if(!unmapped_start){ fprintf(stderr, "No header was defined for unmapped. \n Shutting down.\n"); MPI_Finalize(); return 0; } time_count = MPI_Wtime(); writeSam_discordant_and_unmapped( split_rank, output_dir, header, localReadNumberByChr[nbchr-s], chrNames[nbchr-s], reads[nbchr-s], split_size, split_comm, file_name, mpi_file_split_comm2, finfo, compression_level, local_data, goff[rank], write_sam); if (split_rank == chosen_rank){ fprintf(stderr, "rank %d :::::[MPISORT] Time to write chromosom %s , %f seconds \n\n\n", split_rank, chrNames[nbchr-s], MPI_Wtime() - time_count); } } else{ discordant_start = startOffset(g_rank, g_size, discordantSize, headerSize, nbchr-s, localReadNumberByChr[nbchr-s], split_comm); if(!discordant_start){ fprintf(stderr, "No header was defined for discordant.\n Shutting down.\n"); MPI_Finalize(); return 0; } time_count = MPI_Wtime(); writeSam_discordant_and_unmapped( g_rank, output_dir, header, localReadNumberByChr[nbchr-s], chrNames[nbchr-s], reads[nbchr-s], g_size, split_comm, file_name, mpi_file_split_comm2, finfo, compression_level, local_data, goff[rank], write_sam ); if (split_rank == chosen_rank){ fprintf(stderr, "rank %d :::::[MPISORT] Time to write chromosom %s , %f seconds \n\n\n", split_rank, chrNames[nbchr-s], MPI_Wtime() - time_count); } } while( reads[nbchr-s]->next != NULL){ Read *tmp_chr = reads[nbchr-s]; reads[nbchr-s] = reads[nbchr-s]->next; free(tmp_chr); } free(localReadsNum_rank0); } else{ // we do nothing } //we put a barrier before freeing pointers MPI_Barrier(MPI_COMM_WORLD); //we free the file pointer if (file_pointer_to_free) MPI_File_close(&mpi_file_split_comm2); //we free the split_comm if (split_comm_to_free) MPI_Comm_free(&split_comm); split_comm_to_free = 0; file_pointer_to_free = 0; free(color_vec_to_send); free(key_vec_to_send); } } //end for (s=1; s < 3; s++){ /* * We write the mapped reads in a file named chrX.bam * We loop by chromosoms. */ MPI_Barrier(MPI_COMM_WORLD); for(i = 0; i < (nbchr-2); i++){ /* * First Part of the algorithm * * In this part we elected a rank which is the first rank * to have reads to sort. * * Once elected a rank, we plit the communicator according to * wether the rank has reads to sort for this chromosom. * * The new communicator is COMM_WORLD. * * If all jobs have reads to sort no need to split the communicator and then * COMM_WORLD = MPI_COMM_WORLD * */ int i1,i2; size_t localReadsNum_rank0[num_proc]; localReadsNum_rank0[0]=0; int file_pointer_to_free = 0; int split_comm_to_free = 0; //we build a vector with rank job int val_tmp1 = 0; int val_tmp2 = 0; int chosen_rank = 0; //needed to tell what rank is going to compute the color and key int chosen_split_rank= 0; //the rank that collect data once the communication splitted normally this rank is 0 // the color tells in what communicator the rank pertain // color = 0 will be the new communicator color // otherwise the color is 1 // the key value tell the order in the new communicator int *color_vec_to_send = malloc(num_proc * sizeof(int)); int *key_vec_to_send = malloc(num_proc * sizeof(int)); // first we test if the there's reads to sort // rank 0 recieve the sum of all the reads count size_t total_reads_by_chr = 0; MPI_Allreduce(&readNumberByChr[i], &total_reads_by_chr, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD); //fprintf(stderr, "rank %d :::: readNumberByChr[i] = %zu \n", rank, readNumberByChr[i]); //fprintf(stderr, "rank %d :::: total_reads_by_chr = %zu \n", rank, total_reads_by_chr); if (total_reads_by_chr == 0) continue; //pass to next chromosome //rank 0 gather the vector MPI_Allgather(&readNumberByChr[i] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD); if (rank == 0){ //the rank 0 chose the first rank with reads to sort i1=0; while ((localReadsNum_rank0[i1] == 0) && (i1 < num_proc)){ chosen_rank++; i1++; } fprintf(stderr, "rank %d :::: Elected rank = %d \n", rank, chosen_rank); } //we broadcast the chosen rank //task: replace the broadcast with a sendrecieve MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){ //first we exchange the size o if (rank == chosen_rank){ header = malloc((headerSize + 1)*sizeof(char)); header[headerSize] = '\0'; MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (rank == 0){ MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank, 0, MPI_COMM_WORLD); } } else { //we do nothing here } MPI_Barrier(MPI_COMM_WORLD); if (rank == chosen_rank) { int counter = 0; //we compute the number of 0 in the localReadsNum_vec for(i1 = 0; i1 < num_proc; i1++){ if (localReadsNum_rank0[i1] == 0) { counter++; } } // if no jobs without reads we do nothing if ( counter == 0 ){ // nothing to do we associate split_comm with fprintf(stderr, "rank %d ::::[MPISORT] we don't split the rank \n", rank); split_comm = MPI_COMM_WORLD; for (i2 = 0; i2 < num_proc; i2++) { if (localReadsNum_rank0[i2] == 0) { color_vec_to_send[i2] = 1; key_vec_to_send[i2] = val_tmp2; val_tmp2++; } else { color_vec_to_send[i2] = 0; key_vec_to_send[i2] = val_tmp1; val_tmp1++; } } } else{ // now we compute the color according to // the number of reads to sort fprintf(stderr, "rank %d ::::[MPISORT] we split the rank \n", rank); for(i2 = 0; i2 < num_proc; i2++){ if (localReadsNum_rank0[i2] == 0){ color_vec_to_send[i2] = 1; key_vec_to_send[i2] = val_tmp2; val_tmp2++; } else{ color_vec_to_send[i2] = 0; key_vec_to_send[i2] = val_tmp1; val_tmp1++; } } // end for loop }// end if }// end if (rank == plit_rank) MPI_Barrier(MPI_COMM_WORLD); //we create key and color variable for each job int local_color = 0; int local_key = 0; // rank 0 scatter the color and the key vector MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD); MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); // now we create a communicator // we group all communicator // with color of zero if (local_color == 0){ MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm); ierr = MPI_File_open(split_comm, file_name, MPI_MODE_RDONLY, finfo, &mpi_file_split_comm); //we ask to liberate file pointer file_pointer_to_free = 1; //we ask to liberate the split_comm split_comm_to_free = 1; } else{ MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm); mpi_file_split_comm = mpi_filed; } //now we change the rank in the reads structure if (local_color == 0){ MPI_Comm_rank(split_comm, &split_rank); MPI_Comm_size(split_comm, &split_size); //we update g_rank g_rank = split_rank; g_size = split_size; } else{ g_rank = split_rank; g_size = split_size = num_proc; } localReadNumberByChr[i] = readNumberByChr[i]; MPI_Barrier(MPI_COMM_WORLD); if ((local_color == 0) && (i < (nbchr - 2))) { /* * Second part of the algorithm * * First we load coordinates, offset sources, and read size in vector * * Then we sort the coordinates of the reads * with a bitonic sorter * * Then according to the reads coordinates we reoder the offset sources, and size * this is done thanks to the index of the sorting. * * Afterward we compute the offsets of the reads in * the destination file. * * Finally we dispatch the information to all ranks * in the communicator for the next step. */ //we do a local merge sort if(reads[i] && reads[i]->next && reads[i]->next->next){ mergeSort(reads[i], readNumberByChr[i]); } size_t local_readNum = localReadNumberByChr[i]; reads[i] = reads[i]->next; //first we compute the dimension of the parabitonic sort // dimension is the number of processors where we // perform the bitonic sort // int dimensions = (int)(log2(num_processes)); // find next ( must be greater) power, and go one back int dimensions = 1; while (dimensions <= split_size) dimensions <<= 1; dimensions >>= 1; // we get the maximum number of reads among // all the workers /* * Here we split the programm in 2 cases * * 1) The first case de split_size is a power of 2 (the best case) * this case is the simpliest we don't have extra communication to dispatch the read * envenly between the jobs * * 2) The split_size is not a power of 2 (the worst case) * well in this case we shall dispatch the jobs between jobs evenly. * */ if (split_rank == chosen_split_rank){ fprintf(stderr, "Rank %d :::::[MPISORT] Dimensions for bitonic = %d \n", split_rank, dimensions); fprintf(stderr, "Rank %d :::::[MPISORT] Split size = %d \n", split_rank, split_size); } //we test the computed dimension if (dimensions == split_size ){ size_t max_num_read = 0; MPI_Allreduce(&localReadNumberByChr[i], &max_num_read, 1, MPI_LONG_LONG_INT, MPI_MAX, split_comm); // if the dimension == split_size MPI_Barrier(split_comm); size_t first_local_readNum = local_readNum; /* * Vector creation and allocation fprintf(stderr, "split rank %d :::::[MPISORT] max_num_read = %zu \n", split_rank, max_num_read); */ local_readNum = max_num_read; time_count = MPI_Wtime(); size_t *local_reads_coordinates_unsorted = calloc(local_readNum, sizeof(size_t)); size_t *local_reads_coordinates_sorted = calloc(local_readNum, sizeof(size_t)); size_t *local_offset_source_unsorted = calloc(local_readNum, sizeof(size_t)); size_t *local_offset_source_sorted = calloc(local_readNum, sizeof(size_t)); int *local_dest_rank_sorted = calloc(local_readNum, sizeof(int)); int *local_reads_sizes_unsorted = calloc(local_readNum, sizeof(int)); int *local_reads_sizes_sorted = calloc(local_readNum, sizeof(int)); int *local_source_rank_unsorted = calloc(local_readNum, sizeof(int)); int *local_source_rank_sorted = calloc(local_readNum, sizeof(int)); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][MALLOC 1] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); local_reads_coordinates_unsorted[0] = 0; local_reads_coordinates_sorted[0] = 0; local_dest_rank_sorted[0] = 0; local_reads_sizes_unsorted[0] = 0; local_reads_sizes_sorted[0] = 0; local_source_rank_unsorted[0] = 0; local_source_rank_sorted[0] = 0; local_offset_source_unsorted[0] = 0; local_offset_source_sorted[0] = 0; //those vectors are the same that local_..._sorted but without zero padding size_t *local_reads_coordinates_sorted_trimmed = NULL; int *local_dest_rank_sorted_trimmed = NULL; int *local_reads_sizes_sorted_trimmed = NULL; size_t *local_offset_source_sorted_trimmed = NULL; size_t *local_offset_dest_sorted_trimmed = NULL; int *local_source_rank_sorted_trimmed = NULL; //vectors used in the bruck just after the parabitonic sort size_t *local_reads_coordinates_sorted_trimmed_for_bruck = NULL; int *local_dest_rank_sorted_trimmed_for_bruck = NULL; int *local_reads_sizes_sorted_trimmed_for_bruck = NULL; size_t *local_offset_source_sorted_trimmed_for_bruck = NULL; size_t *local_offset_dest_sorted_trimmed_for_bruck = NULL; int *local_source_rank_sorted_trimmed_for_bruck = NULL; //task Init offset and size for source - free chr // from mpiSort_utils.c get_coordinates_and_offset_source_and_size_and_free_reads( split_rank, local_source_rank_unsorted, local_reads_coordinates_unsorted, local_offset_source_unsorted, local_reads_sizes_unsorted, reads[i], first_local_readNum ); //init indices for qksort size_t *coord_index = (size_t*)malloc(local_readNum*sizeof(size_t)); for(j = 0; j < local_readNum; j++){ coord_index[j] = j; } //To start we sort locally the reads coordinates. //this is to facilitate the bitonic sorting //if the local coordinates to sort are to big we could get rid of //this step. time_count = MPI_Wtime(); base_arr2 = local_reads_coordinates_unsorted; qksort(coord_index, local_readNum, sizeof(size_t), 0, local_readNum - 1, compare_size_t); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][LOCAL SORT] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); //We index data for(j = 0; j < local_readNum; j++){ local_reads_coordinates_sorted[j] = local_reads_coordinates_unsorted[coord_index[j]]; local_source_rank_sorted[j] = local_source_rank_unsorted[coord_index[j]]; local_reads_sizes_sorted[j] = local_reads_sizes_unsorted[coord_index[j]]; local_offset_source_sorted[j] = local_offset_source_unsorted[coord_index[j]]; local_dest_rank_sorted[j] = rank; //will be updated after sorting the coordinates } /* * FOR DEBUG * for(j = 0; j < local_readNum - 1; j++){ assert( local_reads_coordinates_sorted[j] < local_reads_coordinates_sorted[j+1]); } */ free(coord_index); //ok free(local_source_rank_unsorted); //ok free(local_reads_coordinates_unsorted); //ok free(local_reads_sizes_unsorted); //ok free(local_offset_source_unsorted); //ok // we need the total number of reads. size_t total_num_read = 0; MPI_Allreduce(&localReadNumberByChr[i], &total_num_read, 1, MPI_LONG_LONG_INT, MPI_SUM, split_comm); /* * * In this section the number of bitonic dimension * is equal to the split size. * * In this case there are less communication in preparation * of the sorting. * * We use the parabitonic version 2. */ //we calll the bitonic time_count = MPI_Wtime(); ParallelBitonicSort2( split_comm, split_rank, dimensions, local_reads_coordinates_sorted, local_reads_sizes_sorted, local_source_rank_sorted, local_offset_source_sorted, local_dest_rank_sorted, max_num_read ); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); size_t k1; size_t tmp2 = 0; for (k1 = 1; k1 < max_num_read; k1++){ assert(local_reads_coordinates_sorted[k1-1] <= local_reads_coordinates_sorted[k1]); local_dest_rank_sorted[k1]= split_rank; } /* for (k1 = 0; k1 < max_num_read; k1++){ fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] local_reads_coordinates_sorted[%zu]= %zu s\n", split_rank, k1, local_reads_coordinates_sorted[k1]); fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] local_source_rank_sorted[%zu]= %d s\n", split_rank, k1, local_source_rank_sorted[k1]); } */ size_t *local_offset_dest_sorted = malloc(max_num_read*sizeof(size_t)); size_t last_local_offset = 0; // We compute the local_dest_offsets_sorted size_t local_total_offset = 0; for (k1 = 0; k1 < max_num_read; k1++){ local_offset_dest_sorted[k1] = local_reads_sizes_sorted[k1]; local_total_offset += local_reads_sizes_sorted[k1]; } //we make the cumulative sum of all offsets for (k1 = 1; k1 < max_num_read; k1++){ local_offset_dest_sorted[k1] = local_offset_dest_sorted[k1 - 1] + local_offset_dest_sorted[k1]; } //we exchange the last destination offset last_local_offset = local_offset_dest_sorted[max_num_read-1]; //number of block to send int blocksize = 1; MPI_Offset *y = calloc(split_size, sizeof(MPI_Offset)); MPI_Offset *y2 = calloc(split_size + 1, sizeof(MPI_Offset)); //we wait all processors MPI_Gather(&last_local_offset, 1, MPI_LONG_LONG_INT, y, 1, MPI_LONG_LONG_INT, 0, split_comm); if (split_rank ==0){ for (k1 = 1; k1 < (split_size + 1); k1++) { y2[k1] = y[k1-1]; } } if (split_rank ==0){ for (k1 = 1; k1 < (split_size +1); k1++) { y2[k1] = y2[k1-1] + y2[k1]; } } size_t offset_to_add = 0; MPI_Scatter(y2, 1, MPI_LONG_LONG_INT, &offset_to_add, 1, MPI_LONG_LONG_INT, 0, split_comm); free(y); free(y2); //we add offset of the previous rank for (k1 = 0; k1 < max_num_read; k1++){ if (local_reads_sizes_sorted[k1] != 0) local_offset_dest_sorted[k1] += offset_to_add; else local_offset_dest_sorted[k1] = 0; } /* for (k1 = 0; k1 < max_num_read; k1++){ fprintf(stderr, "\n"); fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] local_reads_coordinates_sorted[%zu]= %zu s\n", split_rank, k1, local_reads_coordinates_sorted[k1]); fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] local_source_rank_sorted[%zu]= %d s\n", split_rank, k1, local_source_rank_sorted[k1]); fprintf(stderr, "rank %d :::::[MPISORT][BITONIC 2] local_offset_dest_sorted[%zu]= %d s\n", split_rank, k1, local_offset_dest_sorted[k1]); fprintf(stderr, "\n"); } */ /* * we update destination rank according to * original number of reads read. * */ //we compute the new rank dest according to max_num_read size_t previous_num_reads_per_job[dimensions]; //we create a vector of size split_size with previous reads per job MPI_Allgather(&first_local_readNum , 1, MPI_LONG_LONG_INT, previous_num_reads_per_job , 1, MPI_LONG_LONG_INT, split_comm); // we compute the position of of the read in the first // reference without the zero padding of bitonic size_t pos_ref0 = 0; //we need the number of zeros we add for the padding size_t N0 = max_num_read*dimensions - total_num_read; int new_rank = 0; int previous_rank = 0; // we compute the new rank for // the reads sorted by offset destination size_t h = 0; pos_ref0 = max_num_read*split_rank - N0; for(j = 0; j < max_num_read; j++) { if ( local_reads_sizes_sorted[j] != 0){ int new_rank = chosen_split_rank; pos_ref0 = (max_num_read*split_rank +j) - N0; if (pos_ref0 >= 0) { size_t tmp2 = 0; for (h = 0; h < dimensions; h++){ tmp2 += previous_num_reads_per_job[h]; if ( pos_ref0 < tmp2) { new_rank = h; break; } } previous_rank = local_dest_rank_sorted[j]; local_dest_rank_sorted[j] = new_rank; } } } MPI_Barrier(split_comm); size_t offset = 0; size_t numItems = 0; size_t num_read_for_bruck = 0; int *p = local_reads_sizes_sorted; if (p[0] != 0) {offset = 0;}; if (p[max_num_read -1] == 0){offset = max_num_read;} else {while ((*p == 0) && (offset < max_num_read )){ offset++; p++;}} /* * REMOVE ZERO PADDING BEFORE BRUCK * */ time_count = MPI_Wtime(); if (offset > 0){ // we remove zeros in the vector we have 2 cases // the first offset < max_num_read // and the entire vector is null if ( offset < max_num_read ){ numItems = max_num_read - offset; local_reads_coordinates_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_offset_source_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_offset_dest_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_reads_sizes_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); local_dest_rank_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); local_source_rank_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); size_t y=0; for (y = 0; y < numItems; y++){ local_reads_coordinates_sorted_trimmed_for_bruck[y] = local_reads_coordinates_sorted[y+offset]; local_offset_source_sorted_trimmed_for_bruck[y] = local_offset_source_sorted[y+offset]; local_offset_dest_sorted_trimmed_for_bruck[y] = local_offset_dest_sorted[y+offset]; local_reads_sizes_sorted_trimmed_for_bruck[y] = local_reads_sizes_sorted[y+offset]; local_dest_rank_sorted_trimmed_for_bruck[y] = local_dest_rank_sorted[y+offset]; local_source_rank_sorted_trimmed_for_bruck[y] = local_source_rank_sorted[y+offset]; } num_read_for_bruck = numItems; /* * * FOR DEBUG * for(y = 0; y < num_read_for_bruck; y++){ assert( local_reads_sizes_sorted_trimmed_for_bruck[y] != 0 ); assert( local_source_rank_sorted_trimmed_for_bruck[y] < dimensions); assert( local_dest_rank_sorted_trimmed_for_bruck[y] < dimensions); assert( local_offset_source_sorted_trimmed_for_bruck[y] != 0); assert( local_offset_dest_sorted_trimmed_for_bruck[y] != 0); assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0); } */ } else{ numItems = 0; local_reads_coordinates_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_offset_source_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_offset_dest_sorted_trimmed_for_bruck = malloc(numItems * sizeof(size_t)); local_reads_sizes_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); local_dest_rank_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); local_source_rank_sorted_trimmed_for_bruck = malloc(numItems * sizeof(int)); num_read_for_bruck = 0; } } else { numItems = local_readNum; local_reads_coordinates_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(size_t)); local_offset_source_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(size_t)); local_offset_dest_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(size_t)); local_reads_sizes_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(int)); local_dest_rank_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(int)); local_source_rank_sorted_trimmed_for_bruck = malloc(local_readNum * sizeof(int)); size_t y=0; for (y = 0; y < local_readNum; y++){ local_reads_coordinates_sorted_trimmed_for_bruck[y] = local_reads_coordinates_sorted[y]; local_offset_source_sorted_trimmed_for_bruck[y] = local_offset_source_sorted[y]; local_offset_dest_sorted_trimmed_for_bruck[y] = local_offset_dest_sorted[y]; local_reads_sizes_sorted_trimmed_for_bruck[y] = local_reads_sizes_sorted[y]; local_dest_rank_sorted_trimmed_for_bruck[y] = local_dest_rank_sorted[y]; local_source_rank_sorted_trimmed_for_bruck[y] = local_source_rank_sorted[y]; } num_read_for_bruck = numItems; /* * * FOR DEBUG * for(y = 0; y < num_read_for_bruck; y++){ assert( local_reads_sizes_sorted_trimmed_for_bruck[y] != 0 ); assert( local_source_rank_sorted_trimmed_for_bruck[y] < dimensions); assert( local_dest_rank_sorted_trimmed_for_bruck[y] < dimensions); assert( local_offset_source_sorted_trimmed_for_bruck[y] != 0); assert( local_offset_dest_sorted_trimmed_for_bruck[y] != 0); assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0); } */ } free(local_reads_coordinates_sorted); free(local_offset_source_sorted); free(local_offset_dest_sorted); free(local_reads_sizes_sorted); free(local_dest_rank_sorted); free(local_source_rank_sorted); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][TRIMMING] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); /* * We do a Bruck on rank of origin reading */ size_t m=0; int num_proc = dimensions; size_t *number_of_reads_by_procs = calloc( dimensions, sizeof(size_t)); //fprintf(stderr, "rank %d :::::[MPISORT] num_read_for_bruck = %zu \n", split_rank, num_read_for_bruck); for(m = 0; m < num_read_for_bruck; m++){ //assert(new_pbs_orig_rank_off_phase1[m] < dimensions); //assert(new_pbs_dest_rank_phase1[m] < dimensions); number_of_reads_by_procs[local_source_rank_sorted_trimmed_for_bruck[m]]++; } int *local_source_rank_sorted_trimmed_for_bruckv2 = malloc( num_read_for_bruck * sizeof(int)); for(m = 0; m < num_read_for_bruck; m++){ local_source_rank_sorted_trimmed_for_bruckv2[m] = local_source_rank_sorted_trimmed_for_bruck[m]; } size_t count6 = 0; for(m = 0; m < dimensions; m++){ count6 += number_of_reads_by_procs[m]; } assert( count6 == num_read_for_bruck ); MPI_Barrier(split_comm); size_t **reads_coordinates = malloc(sizeof(size_t *) * dimensions); size_t **local_source_offsets = malloc(sizeof(size_t *) * dimensions); size_t **dest_offsets = malloc(sizeof(size_t *) * dimensions); int **read_size = malloc(sizeof(int *) * dimensions); int **dest_rank = malloc(sizeof(int *) * dimensions); int **source_rank = malloc(sizeof(int *) * dimensions); /* * We send in order * * local_offset_source_sorted_trimmed_for_bruck * local_dest_rank_sorted_trimmed_for_bruck * local_reads_coordinates_sorted_trimmed_for_bruck * local_reads_sizes_sorted_trimmed_for_bruck * */ COMM_WORLD = split_comm; time_count = MPI_Wtime(); bruckWrite3(split_rank, dimensions, count6, number_of_reads_by_procs, local_source_rank_sorted_trimmed_for_bruckv2, local_offset_source_sorted_trimmed_for_bruck, //offset sources &local_source_offsets, local_dest_rank_sorted_trimmed_for_bruck, //destination rank &dest_rank, local_reads_coordinates_sorted_trimmed_for_bruck, //reads coordinates &reads_coordinates, local_reads_sizes_sorted_trimmed_for_bruck, //read size &read_size, local_source_rank_sorted_trimmed_for_bruck, //source rank &source_rank, local_offset_dest_sorted_trimmed_for_bruck, &dest_offsets ); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][BRUCK 3] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); time_count = MPI_Wtime(); free(local_reads_coordinates_sorted_trimmed_for_bruck); free(local_dest_rank_sorted_trimmed_for_bruck); free(local_reads_sizes_sorted_trimmed_for_bruck); free(local_offset_source_sorted_trimmed_for_bruck); free(local_offset_dest_sorted_trimmed_for_bruck); free(local_source_rank_sorted_trimmed_for_bruck); free(local_source_rank_sorted_trimmed_for_bruckv2); local_reads_coordinates_sorted_trimmed = malloc(first_local_readNum * sizeof(size_t)); local_offset_source_sorted_trimmed = malloc(first_local_readNum * sizeof(size_t)); local_offset_dest_sorted_trimmed = malloc(first_local_readNum * sizeof(size_t)); local_dest_rank_sorted_trimmed = malloc(first_local_readNum * sizeof(int)); local_source_rank_sorted_trimmed = malloc(first_local_readNum * sizeof(int)); local_reads_sizes_sorted_trimmed = malloc(first_local_readNum * sizeof(int)); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT][FREE + MALLOC] time spent = %f s\n", split_rank, MPI_Wtime() - time_count); /* * GET DATA AFTER BRUCK * */ j=0; size_t k = 0; for(m = 0; m < num_proc; m++) { for(k = 0; k < number_of_reads_by_procs[m]; k++) { local_offset_dest_sorted_trimmed[k + j] = dest_offsets[m][k]; local_dest_rank_sorted_trimmed[k + j] = dest_rank[m][k]; local_reads_sizes_sorted_trimmed[k + j] = read_size[m][k]; local_offset_source_sorted_trimmed[k + j] = local_source_offsets[m][k]; local_reads_coordinates_sorted_trimmed[k + j] = reads_coordinates[m][k]; local_source_rank_sorted_trimmed[k + j] = source_rank[m][k]; } free(dest_offsets[m]); free(dest_rank[m]); free(read_size[m]); free(local_source_offsets[m]); free(reads_coordinates[m]); free(source_rank[m]); j += number_of_reads_by_procs[m]; } free(number_of_reads_by_procs); if (dest_rank != NULL) free(dest_rank); if (read_size != NULL) free(read_size); if (local_source_offsets != NULL) free(local_source_offsets); if (reads_coordinates != NULL) free(reads_coordinates); if (source_rank != NULL) free(source_rank); if (dest_offsets != NULL) free(dest_offsets); local_readNum = first_local_readNum; /* * * FOR DEBUG * for ( j = 0; j < local_readNum; j++){ assert ( local_reads_coordinates_sorted_trimmed[j] != 0 ); assert ( local_offset_source_sorted_trimmed[j] != 0 ); assert ( local_offset_dest_sorted_trimmed[j] != 0 ); assert ( local_reads_sizes_sorted_trimmed != 0 ); assert ( local_dest_rank_sorted_trimmed[j] < split_size ); assert ( local_source_rank_sorted_trimmed[j] < split_size ); } */ free(local_reads_coordinates_sorted_trimmed); if (split_rank == chosen_split_rank) fprintf(stderr, "rank %d :::::[MPISORT] we call write SAM \n", split_rank); malloc_trim(0); time_count = MPI_Wtime(); writeSam( split_rank, output_dir, header, local_readNum, total_reads_by_chr, chrNames[i], reads[i], split_size, split_comm, chosen_split_rank, file_name, mpi_file_split_comm, finfo, compression_level, local_offset_dest_sorted_trimmed, local_offset_source_sorted_trimmed, local_reads_sizes_sorted_trimmed, local_dest_rank_sorted_trimmed, local_source_rank_sorted_trimmed, local_data, goff[rank], first_local_readNum ); if (split_rank == chosen_split_rank){ fprintf(stderr, "rank %d :::::[MPISORT][WRITESAM] chromosom %s ::: %f seconds\n\n\n", split_rank, chrNames[i], MPI_Wtime() - time_count); } } else{ /* * We are in the case the number of cpu is * not a power of 2 * * */ parallel_sort_any_dim( dimensions, //dimension for parabitonic local_readNum, split_rank, split_size, reads, i, //chromosom number chosen_split_rank, split_comm, localReadNumberByChr, local_data, file_name, output_dir, finfo, compression_level, total_reads_by_chr, goff[rank], headerSize, header, chrNames[i], mpi_file_split_comm ); } //end if dimensions < split_rank } //if ((local_color == 0) && (i < (nbchr - 2))) //in the splitted dimension else{ //we do nothing here } //we put a barrier before freeing pointers MPI_Barrier(MPI_COMM_WORLD); //we free the file pointer if (file_pointer_to_free) MPI_File_close(&mpi_file_split_comm); //we free the split_comm if (split_comm_to_free){ MPI_Comm_free(&split_comm); } free(color_vec_to_send); free(key_vec_to_send); }// end loop upon chromosoms (line 665)
int main(int argc, char *argv[]) { int proc_num, my_rank; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &proc_num); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); // check arguments if (argc != 10){ usage(); } int b, x, y, z, t_start, t_end, t_replay_start, t_replay_end, replay_time; int i, j, k, t, m, n, tmp; int err; MPI_Status status; // init char *fname = argv[1]; b = atoi(argv[2]); // number of variables(int) x = atoi(argv[3]); // number of rows of cubic y = atoi(argv[4]); z = atoi(argv[5]); t_start = atoi(argv[6]); // start time step t_end = atoi(argv[7]); t_replay_start = atoi(argv[8]); // "replay" start time step t_replay_end = atoi(argv[9]); if(my_rank == 0) printf("b:%d x:%d y:%d z:%d t_start:%d t_end:%d t_replay_start:%d t_replay_start:%d \n", b, x, y, z, t_start, t_end, t_replay_start, t_replay_end); MPI_Info info = MPI_INFO_NULL; MPI_File fh; /* | b | ____________ / /| ...Proc0 / //| /___________// |z myrows I |__________|/ | | | / | | /y | |/ ...ProcN ------------ x*b T0 */ // distribute work to different procs int myrows = y / proc_num; int myreadsize = b * x * myrows * z * (t_end - t_start); // size of on row int myonereadsize = x * b; // allocate buffer for reading myrows * x * z * b bytes data // for each time step int *buf = (int*)malloc(myreadsize * sizeof(int)); assert(buf != NULL); // Open file err = MPI_File_open(MPI_COMM_WORLD, fname, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); assert(err == MPI_SUCCESS); int start_offset = 0; int read_cnt = 0; double start, finish; double total_io = 0.0; for(t = t_start; t < t_end; t++){ // start together MPI_Barrier(MPI_COMM_WORLD); start = MPI_Wtime(); // each time step start offset is t*b*x*y*z*sizeof(int) start_offset = t * b * x * y * z; // read a slice of each time step for(i = 0; i < z; i++){ for(j = 0; j < myrows; j++){ MPI_File_read_at(fh, (start_offset + i*b*x*y + j*b*x + my_rank * myonereadsize * myrows) * sizeof(int) , &buf[read_cnt*myonereadsize], myonereadsize, MPI_INT, &status); read_cnt++; } } finish = MPI_Wtime(); if(my_rank == 0) printf("%d: I/O time %lf\n", t, finish - start); total_io += finish - start; start = MPI_Wtime(); // do some computation here sleep(myrows / 8); finish = MPI_Wtime(); //if(my_rank == 0) printf("%d: Computation time %lf\n", t, finish - start); } /* read_cnt = 0; for(t = t_replay_start; t < t_replay_end; t++){ // start together MPI_Barrier(MPI_COMM_WORLD); start = MPI_Wtime(); // each time step start offset is t*b*x*y*z*sizeof(int) start_offset = t * b * x * y * z; // read a slice of each time step for(i = 0; i < z; i++){ for(j = 0; j < myrows; j++){ MPI_File_read_at(fh, (start_offset + i*b*x*y + j*b*x + my_rank * myonereadsize * myrows) * sizeof(int) , &buf[read_cnt*myonereadsize], myonereadsize, MPI_INT, &status); read_cnt++; } } finish = MPI_Wtime(); if(my_rank == 0) printf("%d: I/O time %lf\n", t, finish - start); total_io += finish - start; start = MPI_Wtime(); // do some computation here sleep(myrows / 8); finish = MPI_Wtime(); //if(my_rank == 0) printf("%d: Computation time %lf\n", t, finish - start); } */ err = MPI_File_close(&fh); assert(err == MPI_SUCCESS); if(my_rank == 0) printf("Avg reading time: %lf\n",total_io/(t_end-t_start+ t_replay_end-t_replay_start)); /* // check read numbers if(my_rank == 1){ int cnt = 0; for(i = 0; i < b*x*myrows*z*(t_end-t_start); i++){ if(i % (x*b) == 0) printf("\n"); if(i % (x*b*myrows) == 0) printf("\n==============%d============\n\n",cnt++); printf(" %3d",buf[i]); } printf("\n"); } */ free(buf); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int proc_num, my_rank, len; int i, j; double start_time, elapsed_time, all_time; double all_time_max, all_time_avg, all_time_min; MPI_Status status; MPI_File fh; MPI_Datatype contig_type; MPI_Init(&argc, &argv); // get the number of procs and rank in the comm MPI_Comm_size(MPI_COMM_WORLD, &proc_num); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); if(argc != 4) { printf("Wrong argument number!\n"); printf("Use %s filename request_size repeat_times\n", argv[0]); return 0; } int req_size = atoi(argv[2]); int repeat_time = atoi(argv[3]); MPI_Offset stride = proc_num * req_size; MPI_Offset tmp_pos = my_rank * req_size; char *read_data = (char*)malloc(req_size); MPI_Type_contiguous( req_size, MPI_CHAR, &contig_type); MPI_Type_commit(&contig_type); start_time = MPI_Wtime(); //MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if(fh==NULL){ printf("File not exist\n"); return -1; } for(i = 0; i < repeat_time; i++) { // MPI_Barrier(MPI_COMM_WORLD); MPI_File_read_at( fh, tmp_pos, read_data, 1, contig_type, &status ); tmp_pos += stride; } MPI_File_close(&fh); elapsed_time = MPI_Wtime() - start_time; MPI_Reduce(&elapsed_time, &all_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&elapsed_time, &all_time_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&elapsed_time, &all_time_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); all_time_avg /= proc_num; MPI_Barrier(MPI_COMM_WORLD); double data_in_mb = (proc_num*(double)req_size*repeat_time)/(1024.0*1024.0); if(my_rank == 0) printf("Total time: %lf Min time: %lf Avg time: %lf Total data: %dM Agg Bandwidth: %lf\n", all_time, all_time_min, all_time_avg, (int)data_in_mb, data_in_mb/all_time); // printf("%d: %lf\n",my_rank, elapsed_time); free(read_data); MPI_Type_free(&contig_type); MPI_Finalize(); return 0; }
PIDX_return_code PIDX_generic_rst_buf_read_and_aggregate(PIDX_generic_rst_id generic_rst_id) { PIDX_variable_group var_grp = generic_rst_id->idx->variable_grp[generic_rst_id->group_index]; PIDX_variable var0 = var_grp->variable[generic_rst_id->first_index]; // This process does not have any patch to process (after restructuring) if (var0->patch_group_count == 0) return PIDX_success; int v; MPI_File fh; char *directory_path; char *data_set_path; directory_path = malloc(sizeof(*directory_path) * PATH_MAX); memset(directory_path, 0, sizeof(*directory_path) * PATH_MAX); data_set_path = malloc(sizeof(*data_set_path) * PATH_MAX); memset(data_set_path, 0, sizeof(*data_set_path) * PATH_MAX); strncpy(directory_path, generic_rst_id->idx->filename, strlen(generic_rst_id->idx->filename) - 4); char time_template[512]; sprintf(time_template, "%%s/%s", generic_rst_id->idx->filename_time_template); sprintf(data_set_path, time_template, directory_path, generic_rst_id->idx->current_time_step); for (v = generic_rst_id->first_index; v <= generic_rst_id->last_index; ++v) { PIDX_variable var = var_grp->variable[v]; //int bytes_per_value = var->bpv / 8; // copy the size and offset to output Ndim_patch_group patch_group = var->rst_patch_group; Ndim_patch out_patch = var->rst_patch_group->reg_patch; int nx = out_patch->size[0]; int ny = out_patch->size[1]; int nz = out_patch->size[2]; var->rst_patch_group->reg_patch->buffer = malloc(nx * ny * nz * (var->bpv/8) * var->vps); memset(var->rst_patch_group->reg_patch->buffer, 0, nx * ny * nz * (var->bpv/8) * var->vps); if (var->rst_patch_group->reg_patch->buffer == NULL) return PIDX_err_chunk; int data_offset = 0, v1 = 0; for (v1 = 0; v1 < v; v1++) data_offset = data_offset + (out_patch->size[0] * out_patch->size[1] * out_patch->size[2] * (var_grp->variable[v1]->vps * (var_grp->variable[v1]->bpv/8))); int buffer_size = out_patch->size[0] * out_patch->size[1] * out_patch->size[2] * (var->vps * (var->bpv/8)); char *file_name; file_name = malloc(PATH_MAX * sizeof(*file_name)); memset(file_name, 0, PATH_MAX * sizeof(*file_name)); sprintf(file_name, "%s/time%09d/%d_0", directory_path, generic_rst_id->idx->current_time_step, generic_rst_id->idx_c->grank); MPI_Status status; int ret = 0; ret = MPI_File_open(MPI_COMM_SELF, file_name, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); if (ret != MPI_SUCCESS) { fprintf(stderr, "Line %d File %s File opening %s\n", __LINE__, __FILE__, file_name); return PIDX_err_rst; } ret = MPI_File_read_at(fh, data_offset, out_patch->buffer, (buffer_size), MPI_BYTE, &status); if (ret != MPI_SUCCESS) { fprintf(stderr, "Line %d File %s\n", __LINE__, __FILE__); return PIDX_err_rst; } ret = MPI_File_close(&fh); if (ret != MPI_SUCCESS) { fprintf(stderr, "Line %d File %s\n", __LINE__, __FILE__); return PIDX_err_rst; } int k1, j1, i1, r, index = 0, recv_o = 0, send_o = 0, send_c = 0; for (r = 0; r < var->rst_patch_group->count; r++) { for (k1 = patch_group->patch[r]->offset[2]; k1 < patch_group->patch[r]->offset[2] + patch_group->patch[r]->size[2]; k1++) { for (j1 = patch_group->patch[r]->offset[1]; j1 < patch_group->patch[r]->offset[1] + patch_group->patch[r]->size[1]; j1++) { for (i1 = patch_group->patch[r]->offset[0]; i1 < patch_group->patch[r]->offset[0] + patch_group->patch[r]->size[0]; i1 = i1 + patch_group->patch[r]->size[0]) { index = ((patch_group->patch[r]->size[0])* (patch_group->patch[r]->size[1]) * (k1 - patch_group->patch[r]->offset[2])) + ((patch_group->patch[r]->size[0]) * (j1 - patch_group->patch[r]->offset[1])) + (i1 - patch_group->patch[r]->offset[0]); send_o = index * var->vps * (var->bpv/8); send_c = (patch_group->patch[r]->size[0]); recv_o = (nx * ny * (k1 - out_patch->offset[2])) + (nx * (j1 - out_patch->offset[1])) + (i1 - out_patch->offset[0]); memcpy(var->rst_patch_group->patch[r]->buffer + send_o, out_patch->buffer + (recv_o * var->vps * (var->bpv/8)), send_c * var->vps * (var->bpv/8)); } } } } free(var->rst_patch_group->reg_patch->buffer); var->rst_patch_group->reg_patch->buffer = 0; } return PIDX_success; }
int main (int argc, char *argv[]) { int rank, size; MPI_File fh_in, fh_out; MPI_Offset offset; MPI_Status status; MPI_Group origin_group, new_group; MPI_Comm custom_world = MPI_COMM_WORLD; MPI_Init(&argc, &argv); MPI_Comm_size(custom_world, &size); MPI_Comm_rank(custom_world, &rank); // read command if (argc < 4) { if (rank == MASTER_RANK) { fprintf(stderr, "Insufficient args\n"); fprintf(stderr, "Usage: %s N input_file output_file", argv[0]); } return 0; } const int N = atoi(argv[1]); const char *INPUT_NAME = argv[2]; const char *OUTPUT_NAME = argv[3]; // Deal with the case where (N < size) if (N < size) { // obtain the group of proc. in the world communicator MPI_Comm_group(custom_world, &origin_group); // remove unwanted ranks int ranges[][3] = {{N, size-1, 1}}; MPI_Group_range_excl(origin_group, 1, ranges, &new_group); // create a new communicator MPI_Comm_create(custom_world, new_group, &custom_world); if (custom_world == MPI_COMM_NULL) { // terminate those unwanted processes MPI_Finalize(); exit(0); } size = N; } // Read file using MPI-IO int *local_buf; int num_per_node = N / size; offset = rank * num_per_node * sizeof(int); if (rank == (size - 1)) { num_per_node += N % size; } local_buf = malloc(num_per_node * sizeof(int)); MPI_File_open(custom_world, INPUT_NAME, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh_in); MPI_File_read_at(fh_in, offset, local_buf, num_per_node, MPI_INT, &status); MPI_File_close(&fh_in); // Odd-even sort int sorted = false, all_sorted = false; int recv; while (!sorted || !all_sorted) { sorted = true; // local sorting int i; // odd-phase for (i = 1; i < num_per_node; i += 2) { if (local_buf[i] < local_buf[i-1]) { swap(&local_buf[i], &local_buf[i-1]); sorted = false; } } // even-phase for (i = 0; i < num_per_node; i += 2) { if(i == 0) { continue; } if (local_buf[i] < local_buf[i-1]) { swap(&local_buf[i], &local_buf[i-1]); sorted = false; } } // transportation // odd phase if (rank % 2) { MPI_Send(&local_buf[0], 1, MPI_INT, rank - 1, MSG_RECV, custom_world); MPI_Recv(&recv, 1, MPI_INT, rank - 1, MSG_RECV, custom_world, &status); if (recv > local_buf[0]) { local_buf[0] = recv; sorted = false; } } else if (rank != (size - 1)) { MPI_Recv(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world, &status); if(recv < local_buf[num_per_node - 1]) { swap(&recv, &local_buf[num_per_node - 1]); sorted = false; } MPI_Send(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world); } // even phase if ((rank % 2) == 0 && rank != MASTER_RANK) { MPI_Send(&local_buf[0], 1, MPI_INT, rank - 1, MSG_RECV, custom_world); MPI_Recv(&recv, 1, MPI_INT, rank - 1, MSG_RECV, custom_world, &status); if (recv > local_buf[0]) { local_buf[0] = recv; sorted = false; } } else if(rank > MASTER_RANK && rank != (size - 1)) { MPI_Recv(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world, &status); if(recv < local_buf[num_per_node - 1]) { swap(&recv, &local_buf[num_per_node - 1]); sorted = false; } MPI_Send(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world); } MPI_Allreduce(&sorted, &all_sorted, 1, MPI_INT, MPI_LAND, custom_world); } // Write file using MPI-IO MPI_File_open(custom_world, OUTPUT_NAME, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh_out); MPI_File_write_at(fh_out, offset, local_buf, num_per_node, MPI_INT, &status); MPI_File_close(&fh_out); free(local_buf); MPI_Barrier(custom_world); MPI_Finalize(); return 0; }