Beispiel #1
0
int tcio_file_fread(tcio_distributed_fh dist_handle)
{
	MPI_Status status;
	MPI_Offset offset_tmp;
	int count = 0;
	//	tcio_distributed_fh dist_handle = handle->dist_buffer;
	log_debug("rank %d: tcio_file_fread\n",dist_handle->rank);
	MPI_Offset offset = dist_handle->rank * dist_handle->bfsize;
//	MPI_File_seek(dist_handle->fh, offset, MPI_SEEK_CUR);
	MPI_File_read_at(dist_handle->fh,offset, dist_handle->data, dist_handle->bfsize,
			MPI_BYTE, &status);
	MPI_Get_count(&status, MPI_BYTE, &count);

	int i = 1;
	while (count == dist_handle->bfsize)
	{
//		MPI_File_seek(dist_handle->fh, dist_handle->bfsize
//						* dist_handle->num_procs, MPI_SEEK_CUR);
		offset+=dist_handle->bfsize * dist_handle->num_procs;
		MPI_File_read_at(dist_handle->fh, offset, dist_handle->data + i
				* dist_handle->bfsize, dist_handle->bfsize, MPI_BYTE, &status);
		MPI_Get_count(&status, MPI_BYTE, &count);
		i++;
	}
//	offset_tmp = (i - 1) * dist_handle->num_procs * dist_handle->bfsize + count;
	offset_tmp = offset + count;
	//broadcast to find the max offset
	MPI_Allreduce(&offset_tmp, &dist_handle->max_offset, 1, MPI_LONG_LONG_INT,
			MPI_MAX, MPI_COMM_WORLD);
	log_debug("rank %d: tcio_file_fread end\n",dist_handle->rank);
	return 0;
}
Beispiel #2
0
void read_file_bufferizer::read_buffer() {
	unsigned long to_read;

	buf_pos = 0;
	to_read = buf_size;
	if (to_read > end_file_pos - cur_file_pos_read) to_read = end_file_pos - cur_file_pos_read;
#ifdef FILE_VIA_MPI
	MPI_File_read_at(fh, cur_file_pos_read, buffer, to_read, MPI_BYTE, NULL);
#else
#ifdef WIN32_FILE
	LARGE_INTEGER move;
	move.QuadPart = cur_file_pos_read;
	if (!SetFilePointerEx(fh, move, NULL, FILE_BEGIN)) {
		printf("read_file_bufferizer::read_buffer: cannot set file pointer to the end! file %s, LastError = %d\n", read_file_name, GetLastError());
		ABORT(1);
	}
	unsigned long max_read = to_read;
	if (!ReadFile(fh, buffer, max_read, &to_read, NULL)) {
		printf("read_file_bufferizer::read_buffer: cannot read file to the end! file %s, LastError = %d\n", read_file_name, GetLastError());
		ABORT(1);
	}
#else
	os_lseek64(fh, cur_file_pos_read, SEEK_SET);
	to_read = ::read(fh, buffer, to_read);
#endif
#endif
	cur_file_pos_read += to_read;
	bytes_in_buffer = to_read;
}
Beispiel #3
0
int MPI_File_iread_at(MPI_File mpi_fh, MPI_Offset offset, void *buf,
                      int count, MPI_Datatype datatype, 
                      MPIO_Request *request)
{
	int error_code;
	MPI_Status *status;

        MPID_CS_ENTER();
        MPIR_Nest_incr();

	status = (MPI_Status *) ADIOI_Malloc(sizeof(MPI_Status));

	/* for now, no threads or anything fancy. 
	 * just call the blocking version */
	error_code = MPI_File_read_at(mpi_fh, offset, buf, count, datatype,
				      status); 
	/* ROMIO-1 doesn't do anything with status.MPI_ERROR */
	status->MPI_ERROR = error_code;

	/* kick off the request */
	MPI_Grequest_start(MPIU_Greq_query_fn, MPIU_Greq_free_fn, 
			   MPIU_Greq_cancel_fn, status, request);
	/* but we did all the work already */
	MPI_Grequest_complete(*request);

        MPIR_Nest_decr();
        MPID_CS_EXIT();

	/* passed the buck to the blocking version...*/
	return MPI_SUCCESS;
}
Beispiel #4
0
	/*!
	Reads the geometry from given open file starting at given offset.

	Returns true on success, false otherwise.
	*/
	bool read(MPI_File file, MPI_Offset offset) const
	{
		int read_geometry_id = No_Geometry::geometry_id + 1;
		const int ret_val = MPI_File_read_at(
			file,
			offset,
			(void*) &read_geometry_id,
			1,
			MPI_INT,
			MPI_STATUS_IGNORE
		);
		if (ret_val != MPI_SUCCESS) {
			std::cerr << __FILE__ << ":" << __LINE__
				<< " Couldn't read geometry data from given file: " << Error_String()(ret_val)
				<< std::endl;
			return false;
		}

		if (read_geometry_id != No_Geometry::geometry_id) {
			std::cerr << __FILE__ << ":" << __LINE__
				<< " Wrong geometry: " << read_geometry_id
				<< ", should be " << No_Geometry::geometry_id
				<< std::endl;
			return false;
		}

		return true;
	}
Beispiel #5
0
inline bool SpParHelper::FetchBatch(MPI_File & infile, MPI_Offset & curpos, MPI_Offset end_fpos, bool firstcall, vector<string> & lines, int myrank)
{
    size_t bytes2fetch = ONEMILLION;    // we might read more than needed but no problem as we won't process them
    char * buf = new char[bytes2fetch];
    char * originalbuf = buf;   // so that we can delete it later because "buf" will move
    MPI_Status status;
    int bytes_read;
    if(firstcall)
    {
        curpos -= 1;    // first byte is to check whether we started at the beginning of a line
        bytes2fetch += 1;
    }
    
    MPI_File_read_at(infile, curpos, buf, bytes2fetch, MPI_CHAR, &status);
    MPI_Get_count(&status, MPI_CHAR, &bytes_read);  // MPI_Get_Count can only return 32-bit integers
    if(!bytes_read)
    {
        delete [] originalbuf;
        return true;    // done
    }
    SpParHelper::check_newline(&bytes_read, bytes2fetch, buf);
    if(firstcall)
    {
        if(buf[0] == '\n')  // we got super lucky and hit the line break
        {
            buf += 1;
            bytes_read -= 1;
            curpos += 1;
        }
        else    // skip to the next line and let the preceeding processor take care of this partial line
        {
            char *c = (char*)memchr(buf, '\n', MAXLINELENGTH); //  return a pointer to the matching byte or NULL if the character does not occur
            if (c == NULL) {
                cout << "Unexpected line without a break" << endl;
            }
            int n = c - buf + 1;
            bytes_read -= n;
            buf += n;
            curpos += n;
        }
    }
    while(bytes_read > 0 && curpos < end_fpos)  // this will also finish the last line
    {
        char *c = (char*)memchr(buf, '\n', bytes_read); //  return a pointer to the matching byte or NULL if the character does not occur
        if (c == NULL) {
            delete [] originalbuf;
            return false;  // if bytes_read stops in the middle of a line, that line will be re-read next time since curpos has not been moved forward yet
        }
        int n = c - buf + 1;
        
        // string constructor from char * buffer: copies the first n characters from the array of characters pointed by s
        lines.push_back(string(buf, n-1));  // no need to copy the newline character
        bytes_read -= n;   // reduce remaining bytes
        buf += n;   // move forward the buffer
        curpos += n;
    }
    delete [] originalbuf;
    if (curpos >= end_fpos) return true;  // don't call it again, nothing left to read
    else    return false;
}
FORT_DLL_SPEC void FORT_CALL mpi_file_read_at_ ( MPI_Fint *v1, MPI_Offset *v2, void*v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){
#ifdef MPI_MODE_RDONLY
    *ierr = MPI_File_read_at( MPI_File_f2c(*v1), *v2, v3, *v4, (MPI_Datatype)(*v5), (MPI_Status *)(v6) );
#else
*ierr = MPI_ERR_INTERN;
#endif
}
Beispiel #7
0
FORTRAN_API void FORT_CALL mpi_file_read_at_(MPI_Fint *fh,MPI_Offset *offset,void *buf,
      MPI_Fint *count,MPI_Fint *datatype,MPI_Status *status, MPI_Fint *ierr )
{
    MPI_File fh_c;
    
    fh_c = MPI_File_f2c(*fh);
    *ierr = MPI_File_read_at(fh_c,*offset,buf,*count,(MPI_Datatype)*datatype,status);
}
Beispiel #8
0
// nrItems need to be exactly specified
vector<vector<float> > Storage::LoadDataFloatMPIBin(char* filename, int nrItems, int startColumn, int endColumn, MPI_Comm comm)//vector<int> indexes, MPI_Comm comm)
{
	double timeStart;
	if(m_mpiRank == 0)
	{
		cout<<"Loading "<<filename<<"...";cout.flush();
		timeStart = MPI_Wtime();
	}

	vector<vector<float> > data(nrItems);

	MPI_File fh;
	MPI_File_open(comm,filename,MPI_MODE_RDONLY,MPI_INFO_NULL,&fh);

	vector<float> tempData(nrItems*(endColumn-startColumn));

	if(endColumn-startColumn == 0)
	{
		cout<<"(E) endColumn-startColumn == 0\n";cout.flush();
	}

	MPI_Status status;
	MPI_File_read_at(fh,startColumn*nrItems*sizeof(MPI_REAL4),&tempData[0],(endColumn-startColumn)*nrItems,MPI_REAL4,&status);//MPI_FLOAT,&status);
	//MPI_File_read_at(fh,startColumn*nrItems*sizeof(MPI_FLOAT),&tempData[0],(endColumn-startColumn)*nrItems,MPI_FLOAT,&status);

	
	//for(int i=0;i<(endColumn-startColumn);i++)
	for(int i=0;i<nrItems;i++)
	{
		vector<float> f(endColumn-startColumn);
		data[i] = f;
	}

	int index = 0;
	for(int j=0;j<(endColumn-startColumn);j++)
	{
		for(int i=0;i<nrItems;i++)
		{
			data[i][j] = tempData[index];
			index++;
		}
	}

	MPI_File_close(&fh);

	if(m_mpiRank == 0)
	{
		if(data.size()>0)
			cout<<"Loaded "<<data.size()<<" items of size "<<data[0].size()<<". (Time (process 0): "<<MPI_Wtime()-timeStart<<")\n";
		else
			cout<<"Warning: Loaded no items from filename: "<<filename<<"\n";
		
		cout.flush();
	}

	return data;
}
Beispiel #9
0
int main( int argc, char *argv[] )
{
    int            rank, errs = 0, rc;
    MPI_Errhandler ioerr_handler;
    MPI_Status     status;
    MPI_File       fh;
    char           inbuf[80];

    MTest_Init( &argc, &argv );

    MPI_Comm_rank( MPI_COMM_WORLD, &rank );

    /* Create a file to which to attach the handler */
    rc = MPI_File_open( MPI_COMM_WORLD, (char*)"test.txt",
                        MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE,
                        MPI_INFO_NULL, &fh );
    if (rc) {
        errs ++;
        printf( "Unable to open test.txt for writing\n" );
    }

    rc = MPI_File_create_errhandler( user_handler, &ioerr_handler );
    if (rc) {
        errs++;
        printf("MPI_File_create_Errhandler returned an error code: %d\n", rc);
    }

    rc = MPI_File_set_errhandler( fh, ioerr_handler );
    if (rc) {
        errs++;
        printf("MPI_File_set_errhandler returned an error code: %d\n", rc);
    }

    /* avoid leaking the errhandler, safe because they have refcount semantics */
    rc = MPI_Errhandler_free(&ioerr_handler);
    if (rc) {
        errs++;
        printf("MPI_Errhandler_free returned an error code: %d\n", rc);
    }

    /* This should generate an error because the file mode is WRONLY */
    rc = MPI_File_read_at( fh, 0, inbuf, 80, MPI_BYTE, &status );
    if (handlerCalled != 1) {
        errs++;
        printf( "User-defined error handler was not called\n" );
    }

    rc = MPI_File_close( &fh );
    if (rc) {
        errs++;
        printf("MPI_File_close returned an error code: %d\n",rc);
    }

    MTest_Finalize( errs );
    MPI_Finalize( );
    return 0;
}
Beispiel #10
0
static int verify_type(char *filename, MPI_Datatype type,
	int64_t expected_extent, int do_coll)
{
    int rank, canary;
    MPI_Count tsize;
    int compare=-1;
    int errs=0, toterrs=0;
    MPI_Status status;
    MPI_File fh;

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    CHECK( MPI_File_open(MPI_COMM_WORLD, filename,
		MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh));
    CHECK( MPI_File_set_view(fh, rank*sizeof(int),
	    MPI_BYTE, type, "native", MPI_INFO_NULL));

    MPI_Type_size_x(type, &tsize);

    canary=rank+1000000;

    /* skip over first instance of type */
    if (do_coll) {
	CHECK( MPI_File_write_at_all(fh, tsize, &canary, 1, MPI_INT, &status));
    } else {
	CHECK( MPI_File_write_at(fh, tsize, &canary, 1, MPI_INT, &status));
    }

    CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native",
		MPI_INFO_NULL));

    if (do_coll) {
	CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank,
		&compare, 1, MPI_INT, &status));
    } else {
	CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank,
		&compare, 1, MPI_INT, &status));
    }

    if (compare != canary)
	errs=1;
    MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

    MPI_File_close(&fh);

    if (toterrs) {
	printf("%d: got %d expected %d\n", rank, compare, canary);
	/* keep file if there's an error */
    } else {
	if (rank == 0) MPI_File_delete(filename, MPI_INFO_NULL);
    }

    return (toterrs);

}
Beispiel #11
0
void mpi_file_read_at_(MPI_Fint * fh, MPI_Offset * offset, void *buf,
                       MPI_Fint * count, MPI_Fint * datatype, MPI_Status * status, MPI_Fint * ierr)
{
    MPI_File fh_c;
    MPI_Datatype datatype_c;

    fh_c = MPI_File_f2c(*fh);
    datatype_c = MPI_Type_f2c(*datatype);

    *ierr = MPI_File_read_at(fh_c, *offset, buf, *count, datatype_c, status);
}
Beispiel #12
0
char get_xy_cell(long long x,long long y,MPI_File file,long long mapxsize,long long mapysize) { // get the value of a cell from a file

  char temp;
  MPI_Status status;

  MPI_Offset offset=y*mapxsize+x; // this is the location in the file to read from

  MPI_File_read_at(file,offset,&temp,1,MPI_CHAR,&status);
 
  return temp;
}
int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
	try {
				
		MPI_Status status; 
		int pid;
		MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
		
		for(int i=0;i<num;i++){
			
			if (m->control_pressed) {	return 1;	}
			
			//read next sequence
			int length = MPIPos[start+i+1] - MPIPos[start+i];
	
			char* buf4 = new char[length];
			MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
			
			string tempBuf = buf4;
			if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
			istringstream iss (tempBuf,istringstream::in);
			delete buf4;

			Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
				
			if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
				
				if	(candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
					m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
				}else{
					//find chimeras
					chimera->getChimeras(candidateSeq);
					
					if (m->control_pressed) {	delete candidateSeq; return 1;	}
		
					//print results
					chimera->print(outMPI, outAccMPI);
				}
			}
			delete candidateSeq;
			
			//report progress
			if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;			}
		}
		//report progress
		if(num % 100 != 0){		cout << "Processing sequence: " << num << endl;	 	}
		
				
		return 0;
	}
	catch(exception& e) {
		m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
		exit(1);
	}
}
Beispiel #14
0
/*
 * mpi_io_shared
 *
 * creates a single-shared-file
 * writes with independent-io
 * reads with independent-io
 * writes with collective-io
 * reads with collective-io
 */
int mpi_io_shared (char *path, int size, int rank)
{
    MPI_File fh;
    char filepath[512];
    MPI_Offset offset;
    MPI_Status status;
    void *buf;
    int bufcount = BYTES_PER_RANK;
    int rc;

    buf = malloc(bufcount);
    if (!buf) { return 0; }

    memset(buf, 0xa, bufcount);

    sprintf(filepath, "%s/%s", path, "cp-bench-mpio-shared");
    rc = MPI_File_open(MPI_COMM_WORLD,
                       filepath,
                       (MPI_MODE_CREATE|MPI_MODE_RDWR|MPI_MODE_DELETE_ON_CLOSE),
                       MPI_INFO_NULL,
                       &fh);
    MPI_CHECK(rc,"MPI_File_open");

    /* Indep Write */
    offset = rank * bufcount;
    rc = MPI_File_write_at(fh,offset,buf,bufcount,MPI_BYTE,&status);
    MPI_CHECK(rc,"MPI_File_write_at");

    MPI_Barrier(MPI_COMM_WORLD);

    /* Indep Read */
    offset = ((rank+1)%size) * bufcount;
    rc = MPI_File_read_at(fh,offset,buf,bufcount,MPI_BYTE,&status);
    MPI_CHECK(rc,"MPI_File_read_at");

    /* Collective Write */
    offset = rank * bufcount;
    rc = MPI_File_write_at_all(fh, offset, buf, bufcount, MPI_BYTE, &status);
    MPI_CHECK(rc,"MPI_File_write_at_all");

    /* Collective Read */
    offset = ((rank+1)%size) * bufcount;
    rc = MPI_File_read_at_all(fh, offset, buf, bufcount, MPI_BYTE, &status);
    MPI_CHECK(rc,"MPI_File_read_at_all");

    rc = MPI_File_close(&fh);
    MPI_CHECK(rc,"MPI_File_close");

    free(buf);

    return 1;
}
Beispiel #15
0
FORT_DLL_SPEC void FORT_CALL mpi_file_read_at_ ( MPI_Fint *v1, MPI_Offset *v2, void*v3, MPI_Fint *v4, MPI_Fint *v5, MPI_Fint *v6, MPI_Fint *ierr ){
#ifdef MPI_MODE_RDONLY

#ifndef HAVE_MPI_F_INIT_WORKS_WITH_C
    if (MPIR_F_NeedInit){ mpirinitf_(); MPIR_F_NeedInit = 0; }
#endif

    if (v6 == MPI_F_STATUS_IGNORE) { v6 = (MPI_Fint*)MPI_STATUS_IGNORE; }
    *ierr = MPI_File_read_at( MPI_File_f2c(*v1), (MPI_Offset)*v2, v3, (int)*v4, (MPI_Datatype)(*v5), (MPI_Status *)v6 );
#else
*ierr = MPI_ERR_INTERN;
#endif
}
Beispiel #16
0
//load m*n matrix
int loadmatrix_rows(MPI_File *fh, float *rbuf, int numrows, int rank, int numtasks, int m, int n)
{
	MPI_Offset offset = 0;
	int i = 0, j = 0;
	MPI_Status status;
	MPI_Datatype rowtype;
	int result = 0;
	
	MPI_Type_contiguous(n, MPI_FLOAT, &rowtype);
	MPI_Type_commit(&rowtype);
	
	if(rank < m%numtasks)
	{
		offset = numrows*rank;
	}
	else if(rank == m%numtasks)
	{
		offset = (numrows+1)*rank;
	}
	else
	{
		offset = numrows*rank+m%numtasks;
	}
		
	MPI_File_set_view(*fh, 0, rowtype, rowtype, "native", MPI_INFO_NULL);
	
	result = MPI_File_read_at(*fh, offset, rbuf, numrows, rowtype, &status);
	if(result != MPI_SUCCESS)
	{
		printf("Proc %d read at %d error!\n", rank, offset);
	}
	
	/*
	for(i = 0; i < numrows; i++)
	{
		printf("Proc %d row %d: ", rank, i);
		for(j = 0; j < n; j++)
		{
			printf("%f, ", rbuf[i*n+j]);
		}
		printf("\n");
	}
	*/
	
	//MPI_Barrier(MPI_COMM_WORLD);
	
	MPI_Type_free(&rowtype);
	
	return n*numrows;
}
Beispiel #17
0
int main(int argc, char **argv){
	int rank, size, bufsize, nints;
	MPI_File fh;
	MPI_Status status;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	MPI_Comm_size(MPI_COMM_WORLD, &size);
	bufsize = FILESIZE/size;
	nints = bufsize/sizeof(int);
	int buf[nints];
	MPI_File_open(MPI_COMM_WORLD,"binaryfile",MPI_MODE_RDONLY,MPI_INFO_NULL,&fh);
	MPI_File_read_at(fh, rank*bufsize, buf, nints, MPI_INT, &status);
	printf("\nrank: %d, buf[%d]: %d \n", rank, rank*bufsize, buf[0]);
	MPI_File_close(&fh);
	MPI_Finalize();
	return 0;
}
Beispiel #18
0
raf_t MPI_Load_raf(char *name,MPI_Comm comm){
	raf_t raf=(raf_t)RTmalloc(sizeof(struct raf_struct_s));
	raf_init(raf,name);
	raf->blocksize=65536;
	MPI_File f;
	MPI_Comm_size(comm,&(raf->workers));
	MPI_Comm_rank(comm,&(raf->rank));
	int e=MPI_File_open(comm,name,MPI_MODE_RDONLY,MPI_INFO_NULL,&f);
	if(e){
		int i=1024;
		char msg[1024];
		MPI_Error_string(e,msg,&i);
		Fatal(0,error,"err is %s\n",msg);
	}
	MPI_File_set_errhandler(f,MPI_ERRORS_ARE_FATAL);
	MPI_File_get_size(f,&(raf->size));
	if ((raf->size)%(raf->blocksize)) Fatal(0,error,"file not multiple of block size");
	if (((raf->size)/(raf->blocksize))%(raf->workers)) Fatal(0,error,"block count not multiple of worker count");
	//Warning(info,"my share is %d",(raf->size)/(raf->workers));
	raf->data=RTmalloc((raf->size)/(raf->workers));
	if (1) {
		Warning(info,"using MPI_File_read_all");
		MPI_Datatype ftype;
		MPI_Type_vector((raf->size)/(raf->blocksize),(raf->blocksize),(raf->blocksize)*(raf->workers),MPI_CHAR,&ftype);
		MPI_Type_commit(&ftype);
		MPI_File_set_view(f,(raf->blocksize)*(raf->rank),MPI_CHAR,ftype,"native",MPI_INFO_NULL);
		MPI_File_read_all(f,raf->data,(raf->size)/(raf->workers),MPI_CHAR,MPI_STATUS_IGNORE);
		MPI_File_close(&f);
		MPI_Type_free(&ftype);
	} else {
		Warning(info,"using MPI_File_read_at");
		int blockcount=((raf->size)/(raf->blocksize))/(raf->workers);
		for(int i=0;i<blockcount;i++){
			MPI_File_read_at(f,((i*(raf->workers)+(raf->rank))*(raf->blocksize)),
				(raf->data)+(i*(raf->blocksize)),(raf->blocksize),MPI_CHAR,MPI_STATUS_IGNORE);
		}
		MPI_File_close(&f);
	}
	raf->rq_tag=core_add(raf,request_service);
	raf->ack_tag=core_add(raf,receive_service);
	raf->shared.read=read_at;
	raf->shared.size=mpi_size;
	raf->shared.close=mpi_close;
	//Warning(info,"file loaded");
	return raf;
}
Beispiel #19
0
JNIEXPORT void JNICALL Java_mpi_File_readAt(
        JNIEnv *env, jobject jthis, jlong fh, jlong fileOffset,
        jobject buf, jboolean db, jint off, jint count,
        jlong jType, jint bType, jlongArray stat)
{
    MPI_Datatype type = (MPI_Datatype)jType;
    void *ptr;
    ompi_java_buffer_t *item;
    ompi_java_getWritePtr(&ptr, &item, env, buf, db, count, type);
    MPI_Status status;

    int rc = MPI_File_read_at((MPI_File)fh, (MPI_Offset)fileOffset,
                              ptr, count, type, &status);

    ompi_java_exceptionCheck(env, rc);
    ompi_java_releaseWritePtr(ptr, item, env, buf, db, off, count, type, bType);
    ompi_java_status_set(env, stat, &status);
}
Beispiel #20
0
int loadmatrix_cross_rows(MPI_File *fh, float *rbuf, int numrows, int rank, int numtasks, int m, int n)
{
	MPI_Offset offset = 0;
	int i = 0, j = 0;
	MPI_Status status;
	MPI_Datatype rowtype;
	MPI_Datatype filetype;
	int result = 0;
	
	MPI_Type_contiguous(n, MPI_FLOAT, &rowtype);
	MPI_Type_commit(&rowtype);
	
	MPI_Type_vector(numrows, 1, numtasks, rowtype, &filetype);
	MPI_Type_commit(&filetype);
	
	offset = rank;
	
	MPI_File_set_view(*fh, offset*n*sizeof(float), rowtype, filetype, "native", MPI_INFO_NULL);
	
	result = MPI_File_read_at(*fh, 0, rbuf, numrows, rowtype, &status);
	if(result != MPI_SUCCESS)
	{
		printf("Proc %d read at %d error!\n", rank, offset);
	}
	
	if(rank == 2)
	{
		for(i = 0; i < numrows; i++)
		{
			printf("Proc %d read row %d: ", rank, i);
			for(j = 0; j < n; j++)
			{
				printf("%f, ", rbuf[i*n+j]);
			}
			printf("\n");
		}
	}
	
	MPI_Type_free(&rowtype);
	MPI_Type_free(&filetype);
	
	return n*numrows;
}
void ompi_file_read_at_f(MPI_Fint *fh, MPI_Offset *offset, char *buf, 
			MPI_Fint *count, MPI_Fint *datatype,
			MPI_Fint *status, MPI_Fint *ierr)
{
    int c_ierr;
    MPI_File c_fh = MPI_File_f2c(*fh);
    MPI_Datatype c_type = MPI_Type_f2c(*datatype);
    OMPI_FORTRAN_STATUS_DECLARATION(c_status,c_status2)

    OMPI_FORTRAN_STATUS_SET_POINTER(c_status,c_status2,status)

    c_ierr = MPI_File_read_at(c_fh, 
                              (MPI_Offset) *offset,
                              buf, 
                              OMPI_FINT_2_INT(*count),
                              c_type, c_status);
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);

    OMPI_FORTRAN_STATUS_RETURN(c_status,c_status2,status,c_ierr)
}
int main(int argc,char *argv[])
{
	int	np,Rank,i;
	int	*Buffer,Buffer_size;
	MPI_Status	status;
	MPI_File	 Fp;
	
	MPI_Init(&argc,&argv);
	MPI_Comm_rank(MPI_COMM_WORLD,&Rank);
	MPI_Comm_size(MPI_COMM_WORLD,&np);
//	Buffer_size	= (File_size)/np;
	Buffer_size=1;
	Buffer	=(int *)malloc(Buffer_size*(sizeof(int)));
	MPI_File_open(MPI_COMM_WORLD,"Readfile",MPI_MODE_RDONLY,MPI_INFO_NULL,&Fp);
	MPI_File_read_at(Fp,0,Buffer,Buffer_size,MPI_INT,&status);
	printf("process: %d read \n",Rank);
	for(i=0;i<Buffer_size;i++)
	{
		printf("%d\n",Buffer[i]);
	}
	MPI_File_close(&Fp);
	MPI_Finalize();
		
}
Beispiel #23
0
int main( int argc, char *argv[] )
{
    unsigned int itr;

    int operacao;
    int verbose;
    int juntar;
    char * chave_file;
    char * entrada_file;
    char * saida_file;

    octeto Nb,Nk,Nr;
    octeto bloco[4*8];
    octeto chave[4*8*15];

    int worldsize, rank;
    MPI_Status status;
    MPI_File chave_handle;
    MPI_File entrada_handle;
    MPI_File saida_handle;

    MPI_Offset entrada_bytes;
    unsigned int numero_blocos;
    unsigned int blocos_processo;
    MPI_Offset bloco_byte_inicio;
    MPI_Offset bloco_byte_fim;
    MPI_Offset iterador;

    Tabela * tabela;
    octeto * tabelaEmpacotada;
    unsigned int proc;
    unsigned int tamanho_tabela;
    Tabela * tabela2;
    unsigned int no_proc;
    unsigned int no_resto;
    unsigned int i;
    BTreeNode * node;
    Indice * indice;


    MPI_Init(&argc,&argv);

    MPI_Comm_size(MPI_COMM_WORLD,&worldsize);
    MPI_Comm_rank(MPI_COMM_WORLD,&rank);

    operacao = INDEFINIDA;
    verbose = 0;
    juntar = 0;
    chave_file = NULL;
    entrada_file = NULL;
    saida_file = NULL;
    for(itr = 1;itr < (unsigned int)argc;itr++)
    {
/* Instrucoes de uso */
        if( strcmp(argv[itr],"-a") == 0 || strcmp(argv[itr],"--ajuda") == 0 || 
            strcmp(argv[itr],"-h") == 0 || strcmp(argv[itr],"--help") == 0 )
        {
            if(rank == 0)
            {
                printf(" Uso: mpiexec -n [PROCESSOS] ./sm-rijndael [ARGUMENTO VALOR].\n");
                printf(" Encripta/Decripta um arquivo usando o algoritmo Rijndael(AES) extendido,\n");
                printf(" realizando um pre-processamento de blocos repetidos.\n");
                printf("  Argumentos opcionais:\n");
                printf("   -v,--verbose: Exibe mensagens de conclusao da operacao.\n");
                printf("   -j,--juntar: Concatena as tabelas de cada processo em um mestre.\n");
                printf("  Argumentos obrigatorios:\n");
                printf("   -op,--operacao: Informa se o objetivo da execucao eh encriptar ou decriptar.\n");
                printf("                    * Os valores possiveis sao: \'encriptar\' e \'decriptar\'.\n");
                printf("   -e,-i,--entrada,--input: Caminho e nome do arquivo a ser criptografado.\n");
                printf("   -s,-o,--saida,--output: Caminho e nome do arquivo resultante do processo de criptografia da entrada.\n");
                printf("   -c,-k,--chave,--key: Caminho e nome do arquivo contendo a chave.\n");
                printf("  O arquivo contendo a chave eh em formato binario de acordo com a seguinte especificacao:\n");
                printf("   - O primeiro byte deve conter o tamanho do bloco (em palavras de 4 bytes).\n");
                printf("      * O bloco pode possuir tamanho: 4, 5, 6, 7 ou 8.\n");
                printf("   - O segundo byte deve conter o tamanho da chave (em palavras de 4 bytes).\n");
                printf("      * Esta aplicacao aceita chaves com tamanho: 4, 5, 6, 7 ou 8.\n");
                printf("   - Os proximos 4*[tamanho da chave] bytes do arquivo sao os bytes componentes da chave, que\n");
                printf("     devem estar (obrigatoriamente) escritos no formato hexadecimal da linguagem C (0xff).\n");
                printf("   * Eh recomendavel o uso de um editor hexadecimal na construcao do arquivo chave.\n");
            }
            goto finalizando;
        }

/* Juntar: Concatena as tabelas de cada processo em um mestre */
        else
        if( strcmp(argv[itr],"-j") == 0 || strcmp(argv[itr],"--juntar") == 0)
        {
            juntar = 1;
        }

/* Verbose: exibir mensagens de finalizacao */
        else
        if( strcmp(argv[itr],"-v") == 0 || strcmp(argv[itr],"--verbose") == 0)
        {
            verbose = 1;
        }

/* Operacao a ser realizada */
        else
        if( strcmp(argv[itr],"-op") == 0 || strcmp(argv[itr],"--operacao") == 0 )
        {
            if( itr+1 < argc )
            {
                if( strcmp(argv[itr+1],"encriptar") == 0 )
                {
                    operacao = ENCRIPTAR;
                }
                else
                if( strcmp(argv[itr+1],"decriptar") == 0 )
                {
                    operacao = DECRIPTAR;
                }
                itr++;
            }
            else
            {
                goto sempar;
            }
        }

/* Arquivo com a chave */
        else
        if( strcmp(argv[itr],"-c") == 0 || strcmp(argv[itr],"--chave") == 0 || 
            strcmp(argv[itr],"-k") == 0 || strcmp(argv[itr],"--key") == 0 )
        {
            if(itr+1 < argc)
            {
                chave_file = argv[itr+1];
                itr++;
            }
            else
            {
                goto sempar;
            }
        }

/* Arquivo de entrada */
        else
        if( strcmp(argv[itr],"-e") == 0 || strcmp(argv[itr],"--entrada") == 0 || 
            strcmp(argv[itr],"-i") == 0 || strcmp(argv[itr],"--input") == 0 )
        {
            if(itr+1 < argc)
            {
                entrada_file = argv[itr+1];
                itr++;
            }
            else
            {
                goto sempar;
            }
        }

/* Arquivo de saida */
        else 
        if( strcmp(argv[itr],"-s") == 0 || strcmp(argv[itr],"--saida") == 0 || 
            strcmp(argv[itr],"-o") == 0 || strcmp(argv[itr],"--output") == 0 )
        {
            if(itr+1 < argc)
            {
                saida_file = argv[itr+1];
                itr++;
            }
            else
            {
                goto sempar;
            }
        }
/* Erro desconhecido */
        else
        {
            if(rank == 0)
            {
                printf("Erro nos argumentos passados.\n");
            }
            goto help;
        }
    }
/* Fim da leitura dos argumentos */

    if( operacao == INDEFINIDA || chave_file == NULL || entrada_file == NULL || saida_file == NULL )
    {
        if(rank == 0)
        {
            if( operacao == INDEFINIDA )
                printf("A operacao a ser realizada eh invalida ou nao foi especificada.\n");
            if( chave_file == NULL )
                printf("Esta faltando especificar o arquivo com a chave.\n");
            if( entrada_file == NULL )
                printf("Esta faltando especificar o arquivo de entrada.\n");
            if( saida_file == NULL )
                printf("Esta faltando especificar o arquivo de saida.\n");
        }
        goto help;
    }
/* Fim do tratamento dos argumentos */

    if( MPI_File_open( MPI_COMM_WORLD, chave_file, MPI_MODE_RDONLY, MPI_INFO_NULL, &chave_handle ) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na abertura do arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }

    if( MPI_File_read(chave_handle,&Nb,1, MPI_BYTE,&status) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na leitura do tamanho de um bloco no arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }
    if( Nb< 4 || Nb > 8 )
    {
        if( rank == 0 )
        {
            printf("Tamanho de bloco invalido no arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }

    if( MPI_File_read(chave_handle,&Nk,1, MPI_BYTE,&status) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na leitura do tamanho da chave no arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }
    if( Nk< 4 || Nk > 8 )
    {
        if( rank == 0 )
        {
            printf("Tamanho de chave invalido no arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }

    if( MPI_File_read(chave_handle,chave,4*Nk,MPI_BYTE,&status) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na leitura da chave no arquivo com a chave (%s).\n",chave_file);
        }
        goto help;
    }

    MPI_File_close( &chave_handle );
    Nr = numero_rodadas(Nb,Nk);
    KeyExpansion(chave,Nb,Nk);

    if( MPI_File_open( MPI_COMM_WORLD, entrada_file, 
            MPI_MODE_RDONLY, 
            MPI_INFO_NULL, &entrada_handle ) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na abertura do arquivo de entrada (%s).\n",entrada_file);
        }
        goto help;
    }

    MPI_File_get_size(entrada_handle,&entrada_bytes);


    if( MPI_File_open( MPI_COMM_WORLD, saida_file, 
            MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_EXCL, 
            MPI_INFO_NULL, &saida_handle ) != MPI_SUCCESS )
    {
        if( rank == 0 )
        {
            printf("Erro na criacao do arquivo de saida (%s).\n",saida_file);
            printf("Uma possivel causa eh que o arquivo ja exista.\n");
        }
        goto help;
    }

    numero_blocos = ( entrada_bytes / (Nb*4) );
    blocos_processo = numero_blocos / worldsize;

    if( operacao == ENCRIPTAR || operacao == DECRIPTAR )
    {
        bloco_byte_inicio = 4*Nb*blocos_processo*rank;
        bloco_byte_fim = 4*Nb*blocos_processo*(rank+1);

        tabela = novaTabela(Nb*4);

        for( iterador = bloco_byte_inicio ; iterador < bloco_byte_fim ; iterador += (4*Nb) )
        {
            if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS )
            {
                if( rank == 0 )
                {
                    printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file);
                }
                goto help;
            }

            novaOcorrenciaTabela(tabela,bloco,iterador);
        }
        
        iterador = 4*Nb*blocos_processo*worldsize + 4*Nb*rank;
        if( iterador < numero_blocos*4*Nb )
        {
            if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS )
            {
                if( rank == 0 )
                {
                    printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file);
                }
                goto help;
            }

            novaOcorrenciaTabela(tabela,bloco,iterador);
        }
        else if( operacao == ENCRIPTAR  &&  iterador == numero_blocos*4*Nb )
        {
            if( MPI_File_read_at(entrada_handle,iterador,bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS )
            {
                if( rank == 0 )
                {
                    printf("Erro ao ler do arquivo de entrada (%s).\n",entrada_file);
                }
                goto help;
            }
            bloco[ 4*Nb - 1 ] = (octeto)(entrada_bytes - numero_blocos*4*Nb);
            novaOcorrenciaTabela(tabela,bloco,iterador);
        }


        if( juntar == 1 )
        {
            tabelaEmpacotada = (octeto*)malloc( entrada_bytes );
            if( rank == 0 ) /* Mestre que vai concatenar todas as arvores*/
            {
                for(proc=1;proc<worldsize;proc++)
                {
                    MPI_Recv( tabelaEmpacotada, entrada_bytes, MPI_BYTE, MPI_ANY_SOURCE, TAG_TABELA_EMPACOTADA, MPI_COMM_WORLD, &status );
                    desempacotarInserindo(tabelaEmpacotada,tabela);
                }
                
                tamanho_tabela = numeroBlocosTabela(tabela);

                no_proc = (tamanho_tabela / worldsize);
                no_resto = (tamanho_tabela % worldsize);
                
                tabela2 = novaTabela(Nb*4);
                for(proc=1;proc<worldsize;proc++)
                {
                    for(i=0;i<no_proc;i++)
                    {
                        soInsiraTabela(tabela2, popLastTabelaNode(tabela) );
                    }
                    if( no_resto > 1 )
                    {
                        soInsiraTabela(tabela2, popLastTabelaNode(tabela) );
                        no_resto--;
                    }
                    empacotarTabela(tabela2,tabelaEmpacotada);

                    MPI_Send(tabelaEmpacotada,numeroBytesTabela(tabela2), MPI_BYTE, proc, TAG_TABELA_EMPACOTADA_2, MPI_COMM_WORLD );

                    destruirArvore(tabela2->root);
                    tabela2->root = NULL;
                }
                destruirTabela(tabela2);
            }
            else
            {
                empacotarTabela(tabela,tabelaEmpacotada);
                MPI_Send(tabelaEmpacotada,numeroBytesTabela(tabela), MPI_BYTE, 0, TAG_TABELA_EMPACOTADA, MPI_COMM_WORLD );
                destruirArvore(tabela->root);
                tabela->root = NULL;

                MPI_Recv( tabelaEmpacotada, entrada_bytes, MPI_BYTE, 0, TAG_TABELA_EMPACOTADA_2, MPI_COMM_WORLD, &status );
                desempacotarInserindo(tabelaEmpacotada,tabela);
            }
            free(tabelaEmpacotada);
        }

        if( operacao == ENCRIPTAR )
            MPI_File_set_size(saida_handle,(MPI_Offset)( (numero_blocos+1)*(Nb*4) ) );
        else if( operacao == DECRIPTAR )
            MPI_File_set_size(saida_handle,entrada_bytes);

        tamanho_tabela = numeroBlocosTabela(tabela);
        for( i=0 ; i<tamanho_tabela ; i++ )
        {
            node = popLastTabelaNode(tabela);
//          memcpy (bloco,node->bloco,4*Nb);

            if( operacao == ENCRIPTAR )
                AES_encriptar_bloco(node->bloco,Nb,chave,Nr);
            else if( operacao == DECRIPTAR )
                AES_decriptar_bloco(node->bloco,Nb,chave,Nr);

            indice = node->ocorrencias;
            while( indice != NULL )
            {
                if( MPI_File_write_at(saida_handle,indice->indice,node->bloco,(4*Nb),MPI_BYTE,&status) != MPI_SUCCESS )
                {
                    if( rank == 0 )
                    {
                        printf("Erro ao escrever no arquivo de saida (%s).\n",saida_file);
                    }
                    goto help;
                }
                indice = indice->next;
            }
            destruirArvore(node);
        }
        destruirTabela(tabela);

        if( operacao == DECRIPTAR )
        {
            MPI_Barrier( MPI_COMM_WORLD ); /*Barreira q impede q alguem leia antes do valor decriptografado ser escrito */

            if( MPI_File_read_at(saida_handle,entrada_bytes-1,bloco,1,MPI_BYTE,&status) != MPI_SUCCESS )
            {
                if( rank == 0 )
                {
                    printf("Erro ao realizar leitura no arquivo de saida (%s).\n",saida_file);
                }
                goto help;
            }

            MPI_Barrier( MPI_COMM_WORLD ); /* Barreira q impede q alqum processo trunque o arquivo antes de outro processo ler*/

            MPI_File_set_size(saida_handle,entrada_bytes - 4*Nb + bloco[0]);
        }

        if( rank == 0 && verbose==1)
        {
            if( operacao == ENCRIPTAR )
                printf("A encriptacao do arquivo foi realizada com sucesso.\n");
            else if( operacao == DECRIPTAR )
                printf("A decriptacao do arquivo foi realizada com sucesso.\n");
        }
    }

    goto finalizando;

sempar:
    if( rank == 0 )
    {
        printf("Sem par correspondente para a opcao %s.\n",argv[itr]);
    }

help:
    if( rank == 0 )
    {
        printf("Use a opcao --help para melhor entendimento do uso da aplicacao.\n");
    }

finalizando:
    MPI_Finalize( );
    return 0;
}
Beispiel #24
0
int main(int argc, char **argv)
{
    char *buf, *tmp, *buf2, *tmp2, *check;
    int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno;
    double stim, etim;
    double write_tim = 0;
    double read_tim = 0;
    double read_bw, write_bw;
    double max_read_tim, max_write_tim;
    double min_read_tim, min_write_tim;
    double ave_read_tim, ave_write_tim;
    int64_t iter_jump = 0;
    int64_t seek_position = 0;
    MPI_File fh;
    MPI_Status status;
    int nchars;

    /* startup MPI and determine the rank of this process */
    MPI_Init(&argc,&argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);

    /* parse the command line arguments */
    parse_args(argc, argv);

    if (mynod == 0) printf("# Using mpi-io calls.\n");


    /* kindof a weird hack- if the location of the pvfstab file was
     * specified on the command line, then spit out this location into
     * the appropriate environment variable: */

#if H5_HAVE_SETENV
/* no setenv or unsetenv */
    if (opt_pvfstab_set) {
            if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){
                    perror("setenv");
                    goto die_jar_jar_die;
            }
    }
#endif

    /* this is how much of the file data is covered on each iteration of
     * the test.  used to help determine the seek offset on each
     * iteration */
    iter_jump = nprocs * opt_block;

    /* setup a buffer of data to write */
    if (!(tmp = (char *) malloc(opt_block + 256))) {
            perror("malloc");
            goto die_jar_jar_die;
    }
    buf = tmp + 128 - (((long)tmp) % 128);  /* align buffer */

    if (opt_correct) {
            /* do the same buffer setup for verifiable data */
            if (!(tmp2 = (char *) malloc(opt_block + 256))) {
                    perror("malloc2");
                    goto die_jar_jar_die;
             }
            buf2 = tmp + 128 - (((long)tmp) % 128);
    }

    /* open the file for writing */
    err = MPI_File_open(MPI_COMM_WORLD, opt_file,
    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (err < 0) {
            fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno));
            goto die_jar_jar_die;
    }

    /* now repeat the write operations the number of times
     * specified on the command line */
    for (j=0; j < opt_iter; j++) {

            /* calculate the appropriate position depending on the iteration
             * and rank of the current process */
            seek_position = (j*iter_jump)+(mynod*opt_block);

            if (opt_correct) /* fill in buffer for iteration */ {
                    for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i;
            }

            /* discover the starting time of the operation */
       MPI_Barrier(MPI_COMM_WORLD);
       stim = MPI_Wtime();

            /* write out the data */
            nchars = opt_block/sizeof(char);
            err = MPI_File_write_at(fh, seek_position, buf, nchars, MPI_CHAR, &status);
            if(err){
                    fprintf(stderr, "node %d, write error: %s\n", mynod,
                    strerror(errno));
            }

            /* discover the ending time of the operation */
       etim = MPI_Wtime();

       write_tim += (etim - stim);

            /* we are done with this "write" iteration */
    }

    err = MPI_File_close(&fh);
    if(err){
            fprintf(stderr, "node %d, close error after write\n", mynod);
    }

    /* wait for everyone to synchronize at this point */
    MPI_Barrier(MPI_COMM_WORLD);

    /* reopen the file to read the data back out */
    err = MPI_File_open(MPI_COMM_WORLD, opt_file,
    MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if (err < 0) {
            fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno));
            goto die_jar_jar_die;
    }


    /* we are going to repeat the read operation the number of iterations
     * specified */
    for (j=0; j < opt_iter; j++) {
            /* calculate the appropriate spot give the current iteration and
             * rank within the MPI processes */
            seek_position = (j*iter_jump)+(mynod*opt_block);

            /* discover the start time */
       MPI_Barrier(MPI_COMM_WORLD);
       stim = MPI_Wtime();

            /* read in the file data */
            if (!opt_correct){
                    err = MPI_File_read_at(fh, seek_position, buf, nchars, MPI_CHAR, &status);
            }
            else{
                    err = MPI_File_read_at(fh, seek_position, buf2, nchars, MPI_CHAR, &status);
            }
            myerrno = errno;

            /* discover the end time */
       etim = MPI_Wtime();
       read_tim += (etim - stim);

       if (err < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n",
                    mynod, mynod*opt_block, strerror(myerrno));

            /* if the user wanted to check correctness, compare the write
             * buffer to the read buffer */
            if (opt_correct && memcmp(buf, buf2, opt_block)) {
                    fprintf(stderr, "node %d, correctness test failed\n", mynod);
                    my_correct = 0;
                    MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN,
                            MPI_COMM_WORLD);
            }

            /* we are done with this read iteration */
    }

    /* close the file */
    err = MPI_File_close(&fh);
    if(err){
            fprintf(stderr, "node %d, close error after write\n", mynod);
    }

    /* compute the read and write times */
    MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX,
            MPI_COMM_WORLD);
    MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN,
            MPI_COMM_WORLD);
    MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM,
            MPI_COMM_WORLD);

    /* calculate the average from the sum */
    ave_read_tim = ave_read_tim / nprocs;

    MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX,
            MPI_COMM_WORLD);
    MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN,
            MPI_COMM_WORLD);
    MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM,
            MPI_COMM_WORLD);

    /* calculate the average from the sum */
    ave_write_tim = ave_write_tim / nprocs;

    /* print out the results on one node */
    if (mynod == 0) {
       read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0);
       write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0);

                    printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs,
            opt_iter, (long)opt_block);

                    printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter));

                    printf("# Write:  min_time = %f, max_time = %f, mean_time = %f\n",
                            min_write_tim, max_write_tim, ave_write_tim);
                    printf("# Read:  min_time = %f, max_time = %f, mean_time = %f\n",
                            min_read_tim, max_read_tim, ave_read_tim);

       printf("Write bandwidth = %f Mbytes/sec\n", write_bw);
       printf("Read bandwidth = %f Mbytes/sec\n", read_bw);

            if (opt_correct) {
                    printf("Correctness test %s.\n", correct ? "passed" : "failed");
            }
    }


die_jar_jar_die:

#if H5_HAVE_SETENV
/* no setenv or unsetenv */
    /* clear the environment variable if it was set earlier */
    if	(opt_pvfstab_set){
            unsetenv("PVFSTAB_FILE");
    }
#endif

    free(tmp);
    if (opt_correct) free(tmp2);
    MPI_Finalize();
    return(0);
}
Beispiel #25
0
void readlines(MPI_File *in, const int rank, const int size, const int overlap,
               /*char ***lines,*/ int *nlines) {
    MPI_Offset filesize;
    MPI_Offset localsize;
    MPI_Offset start;
    MPI_Offset end;
    char *chunk;
    MPI_Offset bytesRead=0;
    MPI_Offset myBytesRead = 4000000, myActualRead = 0;
    MPI_File_get_size(*in, &filesize);
    int control = 0;

    printf("Rank %d started\n", rank);

    while(bytesRead < filesize){
   // while (0){
        /* figure out who reads what */
        start = (rank * myBytesRead) + bytesRead;
        end   = start + myBytesRead - 1;
        
        /* add overlap to the end of everyone's chunk... */
        if (end > filesize || (end + overlap) > filesize)
            end = filesize;
        else
            end += overlap;

        if(start < filesize){
            localsize =  end - start + 1;
      
        }else{
            start = filesize - 1;
            localsize = 0;
        }
        /* allocate memory */
        chunk = (char *)malloc( (localsize + 1)*sizeof(char));

        /* everyone reads in their part */
        MPI_File_read_at(*in, start, chunk, localsize, MPI_CHAR, MPI_STATUS_IGNORE);
        chunk[localsize] = '\0';

        /*
         *  everyone calculate what their start and end *really* are by going 
         *  from the first newline after start to the first newline after the
         *  overlap region starts (eg, after end - overlap + 1)
         */
        int locstart=0, locend=localsize;
        if (localsize != 0)
        {
            if (rank != 0) {
                while(chunk[locstart] != '\n' || chunk[locstart+1] != '+' || chunk[locstart+2] != '\n'){
                    locstart++;
                }
                locstart += 3;

                while(chunk[locstart] != '\n'){
                    locstart++;
                }
                locstart++;
            }

           if (end != filesize) {
                locend -= overlap;
            
                while(chunk[locend] != '\n' || chunk[locend+1] != '+' || chunk[locend+2] != '\n'){
                    locend++;
                }

                locend += 3;

                while(chunk[locend] != '\n'){
                    locend++;
                }
                locend++; 
            }
        }

        // what was actually read by Pi
        myActualRead = locend-locstart;
        if(rank == 0)
            myActualRead += bytesRead;

        /* Now we'll count the number of lines */
        /************************/
        // This part represents the processing:
        // while (fastq_file.ReadNextRecord(rec))

        std::string s = std::string(&chunk[locstart], &chunk[locend]);
        size_t n = std::count(s.begin(), s.end(), '\n');

        uint32 varSuperblockSize = (unsigned int) n/4;

        //printf("Rank %d's superblock has %ld records. #%d\n", rank, varSuperblockSize, control);

       // int varBlockSize = 32;
      //  for (;;)
       // {
         //   if(n % varBlockSize != 0)
           //     varBlockSize++;
         //   else
           //     break;
        //}
        //printf("  ==>Rank %d's block has %d records. #%d\n", rank, varBlockSize, control);

        FastqRecord rec;
       // DsrcFile dsrc_file(varSuperblockSize);

        //dsrc_file.StartCompress("test");

        // who variable decides if processing title or DNA or plus or QS
        int who = 0;
        bool errorFree[4] = {false, false, false, false};
        int64 rec_no = 0;
        // ** READING A RECORD (TITLE, DNA SEQ, PLUS, QUALITY SCORE)
        int j = locstart; 
        while (j < locend){

            switch (who){
              // Read title  
              case 0: {
                uint32 i = 0;
                for (;;){
                    int32 c = chunk[j++];

                    if (c != '\n' && c != '\r'){
                        if (i >= rec.title_size){
                            rec.Extend(rec.title, rec.title_size);
                        }
                        rec.title[i++] = (uchar) c;
                    } else if (i > 0){
                        break;
                    }
                }
                rec.title[i] = 0;
                rec.title_len = i;      
                errorFree[who++] = i > 0 && rec.title[0] == '@';
                break;
              }
              // Read DNA sequence
              case 1:{
                uint32 i = 0;
                int32 c;

                if (rec.sequence_breaks){
                    delete rec.sequence_breaks;
                    rec.sequence_breaks = NULL;
                }
                uint32 last_eol_pos = 0;
                uint32 sequence_break = 0;

                for (;;){
                    c = chunk[j++];

                    if (c == '+'){
                        j--;
                        break;
                    }

                    //if (c == FILE_EOF)
                     //   break;

                    if (c != '\n' && c != '\r'){
                        if (i >= rec.sequence_size){
                            rec.Extend(rec.sequence, rec.sequence_size);
                        }
                        rec.sequence[i++] = (uchar) c;
                    } else{
                        if (last_eol_pos != i){
                            if (sequence_break){
                                if (!rec.sequence_breaks){
                                    rec.sequence_breaks = new std::vector<int>;
                                }
                                rec.sequence_breaks->push_back(sequence_break);
                            } else{
                                sequence_break = i - last_eol_pos;
                            }
                            last_eol_pos = i;
                        }
                    }
                }
                rec.sequence[i] = 0;
                rec.sequence_len = i;
                errorFree[who++] = true;
                break;
              }
              // Read "+"  
              case 2:{
                uint32 i = 0;
                int32 c;
                for (;;){
                    c = chunk[j++];
                    //if (c == FILE_EOF)
                      //  break;

                    if (c != '\n' && c != '\r'){
                        if (i >= rec.plus_size){
                            rec.Extend(rec.plus, rec.plus_size);
                        }
                        rec.plus[i++] = (uchar) c;
                    }
                    else if (i > 0){
                        break;
                    }
                }
                rec.plus[i] = 0;
                rec.plus_len = i;
                errorFree[who++] = i > 0;            
                break;
              }
              // Read quality score
              case 3:{
                uint32 i;
                uint32 last_eol_pos = 0;

                if (rec.quality_breaks){
                    delete rec.quality_breaks;
                    rec.quality_breaks = NULL;
                }

                if (rec.sequence_size > rec.quality_size)
                    rec.ExtendTo(rec.quality, rec.quality_size, rec.sequence_size);

                for (i = 0; i < rec.sequence_len;){
                    int32 c = chunk[j++];
                    //if (c == FILE_EOF)
                    //    break;

                    if (c != '\n' && c != '\r'){
                        rec.quality[i++] = (uchar)c;
                    } else{
                        if (last_eol_pos != i){
                            if (!rec.quality_breaks){
                                rec.quality_breaks = new std::vector<int>;
                            }
                            rec.quality_breaks->push_back(i - last_eol_pos);
                            last_eol_pos = i;
                        }
                    }
                }
                j++; // get the newline

                rec.quality[i] = 0;
                rec.quality_len = i;
                errorFree[who++] = (i == rec.sequence_len);
                break;
              }  
            }
           // If a full record has been read
            if(who == 4){
                if(errorFree[0] && errorFree[1] && errorFree[2] && errorFree[2]){
                   // dsrc_file.WriteRecord(rec);
                    //printf("Rank %d has %ld processed\n", rank, rec_no);
                    ++rec_no;
                    who = 0;
                } else{
                    printf("Rank %d has an error\n", rank);
                    break;
                } 
            }
            //if (chunk[i] == '\n'){
              //  (*nlines)++;
           // }
        }

        //dsrc_file.FinishCompress();
        free(chunk);

        //printf("Rank %d's superblock has %ld records. #%d\n", rank, (*nlines)/4, control);
        //*nlines = 0;
        
        MPI_Reduce(&myActualRead, &bytesRead, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, MPI_COMM_WORLD);
        MPI_Bcast(&bytesRead, 1, MPI_LONG_LONG_INT, 0, MPI_COMM_WORLD);

       // if(rank == 0)
            //printf("%d.>> bytesRead: %ld\n", control, bytesRead);

        control++;


    } // ReadNextRecord
    if(rank == 0)
        printf("%d.***    bytesRead: %ld | filesize: %ld     ***\n", control, bytesRead, filesize);

    return;
}
Beispiel #26
0
int main (int argc, char *argv[]){

	char *x, *y, *z, *xbuf, *hbuf, *chrNames[MAXNBCHR];
	int fd;
	off_t hsiz;
	struct stat st;

	MPI_File mpi_filed;
	MPI_File mpi_file_split_comm;

	MPI_Offset fileSize, unmapped_start, discordant_start;
	int num_proc, rank;
	int res, nbchr, i, paired, write_sam;
	int ierr, errorcode = MPI_ERR_OTHER;
	char *file_name, *output_dir;

	char *header;

	unsigned int headerSize;
	unsigned char threshold;

	size_t input_file_size;
	size_t unmappedSize = 0;
	size_t discordantSize = 0;
	size_t *readNumberByChr = NULL, *localReadNumberByChr = NULL;
	Read **reads;

	double time_count;
	double time_count1;
	int g_rank, g_size;
	MPI_Comm split_comm; //used to split communication when jobs have no reads to sort
	int split_rank, split_size; //after split communication we update the rank and the size
	double tic, toc;
	int compression_level;
	size_t fsiz, lsiz, loff;
	const char *sort_name;
	MPI_Info finfo;

	/* Set default values */
	compression_level = 3;
	parse_mode = MODE_OFFSET;
	sort_name = "coordinate";
	paired = 0;
	threshold = 0;
	write_sam = 0;
	/* Check command line */
	while ((i = getopt(argc, argv, "c:hnpq:")) != -1) {
		switch(i) {
			case 'c': /* Compression level */
				compression_level = atoi(optarg);
				break;
			case 'h': /* Usage display */
				usage(basename(*argv));
				return 0;
			case 'n':
				parse_mode = MODE_NAME;
				sort_name = "queryname";
				break;
			case 'p': /* Paired reads */
				paired = 1;
				break;
			case 'q': /* Quality threshold */
				threshold = atoi(optarg);
				break;
			default:
				usage(basename(*argv));
				return 1;
		}
	}
	if (argc - optind != 2) {
		usage(basename(*argv));
		return 1;
	}
	file_name = argv[optind];
	output_dir = argv[optind+1];

	/* Check arguments */
	res = access(file_name, F_OK|R_OK);
	if (res == -1)
		err(1, "%s", file_name);
	res = access(output_dir, F_OK|W_OK);
	if (res == -1)
		err(1, "%s", output_dir);

	/* MPI inits */
	res = MPI_Init(&argc, &argv);
	assert(res == MPI_SUCCESS);
	res = MPI_Comm_rank(MPI_COMM_WORLD, &rank);
	assert(res == MPI_SUCCESS);
	res = MPI_Comm_size(MPI_COMM_WORLD, &num_proc);
	assert(res == MPI_SUCCESS);

	g_rank = rank;
	g_size = num_proc;

	/* Small summary */
	if (rank == 0) {
		fprintf(stderr, "Number of processes : %d\n", num_proc);
		fprintf(stderr, "Reads' quality threshold : %d\n", threshold);
		fprintf(stderr, "Compression Level is : %d\n", compression_level);
		fprintf(stderr, "SAM file to read : %s\n", file_name);
		fprintf(stderr, "Output directory : %s\n", output_dir);
	}

	/* Process input file */
	fd = open(file_name, O_RDONLY, 0666);
	assert(fd != -1);
	assert(fstat(fd, &st) != -1);
	xbuf = mmap(NULL, (size_t)st.st_size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0);
	assert(xbuf != MAP_FAILED);

	/* Parse SAM header */
	memset(chrNames, 0, sizeof(chrNames));
	x = xbuf; nbchr = 0;
	while (*x == '@') {
		y = strchr(x, '\n');
		z = x; x = y + 1;
		if (strncmp(z, "@SQ", 3) != 0) continue;
		/* Save reference names */
		y = strstr(z, "SN:");
		assert(y != NULL);
		z = y + 3;
		while (*z && !isspace((unsigned char)*z)) z++;
		chrNames[nbchr++] = strndup(y + 3, z - y - 3);
		assert(nbchr < MAXNBCHR - 2);
	}
	chrNames[nbchr++] = strdup(UNMAPPED);
	chrNames[nbchr++] = strdup(DISCORDANT);

	hsiz = x - xbuf;
	hbuf = strndup(xbuf, hsiz);

	if (rank == 0) {
		fprintf(stderr, "The size of the file is %zu bytes\n", (size_t)st.st_size);
		fprintf(stderr, "Header has %d+2 references\n", nbchr - 2);
	}
	asprintf(&header, "@HD\tVN:1.0\tSO:%s\n%s", sort_name, hbuf);

	free(hbuf);

	assert(munmap(xbuf, (size_t)st.st_size) != -1);
	assert(close(fd) != -1);

	//task FIRST FINE TUNING FINFO FOR READING OPERATIONS


	MPI_Info_create(&finfo);
	/*
	 * In this part you shall adjust the striping factor and unit according
	 * to the underlying filesystem.
	 * Harmless for other file system.
	 *
	 */
	MPI_Info_set(finfo,"striping_factor", STRIPING_FACTOR);
	MPI_Info_set(finfo,"striping_unit", STRIPING_UNIT); //2G striping
	MPI_Info_set(finfo,"ind_rd_buffer_size", STRIPING_UNIT); //2gb buffer
	MPI_Info_set(finfo,"romio_ds_read",DATA_SIEVING_READ);

	/*
	 * for collective reading and writing
	 * should be adapted too and tested according to the file system
	 * Harmless for other file system.
	 */
	MPI_Info_set(finfo,"nb_proc", NB_PROC);
	MPI_Info_set(finfo,"cb_nodes", CB_NODES);
	MPI_Info_set(finfo,"cb_block_size", CB_BLOCK_SIZE);
	MPI_Info_set(finfo,"cb_buffer_size", CB_BUFFER_SIZE);


	//we open the input file
	ierr = MPI_File_open(MPI_COMM_WORLD, file_name,  MPI_MODE_RDONLY , finfo, &mpi_filed);
	//assert(in != -1);
	if (ierr){
		if (rank == 0) fprintf(stderr, "%s: Failed to open file in process 0 %s\n", argv[0], argv[1]);
		MPI_Abort(MPI_COMM_WORLD, errorcode);
		exit(2);
	}
	ierr = MPI_File_get_size(mpi_filed, &fileSize);
	assert(ierr == MPI_SUCCESS);
	input_file_size = (long long)fileSize;

	/* Get chunk offset and size */
	fsiz = input_file_size;
	lsiz = fsiz / num_proc;
	loff = rank * lsiz;

	tic = MPI_Wtime();

	headerSize = unmappedSize = discordantSize = strlen(header);

	//We place file offset of each process to the begining of one read's line
	size_t *goff =(size_t*)calloc((size_t)(num_proc+1), sizeof(size_t));
	init_goff(mpi_filed,hsiz,input_file_size,num_proc,rank,goff);

	//We calculate the size to read for each process
	lsiz = goff[rank+1]-goff[rank];
	//NOW WE WILL PARSE
	size_t j=0;
	size_t poffset = goff[rank]; //Current offset in file sam

	//nbchr because we add the discordant reads in the structure
	reads = (Read**)malloc((nbchr)*sizeof(Read));//We allocate a linked list of struct for each Chromosome (last chr = unmapped reads)
	readNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome
	localReadNumberByChr = (size_t*)malloc((nbchr)*sizeof(size_t));//Array with the number of reads found in each chromosome
	Read ** anchor = (Read**)malloc((nbchr)*sizeof(Read));//Pointer on the first read of each chromosome

	//Init first read
	for(i = 0; i < (nbchr); i++){
		reads[i] = malloc(sizeof(Read));
		reads[i]->coord = 0;
		anchor[i] = reads[i];
		readNumberByChr[i]=0;
	}

	toc = MPI_Wtime();

	char *local_data_tmp = malloc(1024*1024);
	char *local_data =(char*)malloc(((goff[rank+1]-poffset)+1)*sizeof(char));
	size_t size_tmp= goff[rank+1]-poffset;
	local_data[goff[rank+1]-poffset] = 0;
	char *q=local_data;

	//We read the file sam and parse
	while(poffset < goff[rank+1]){

		size_t size_to_read = 0;

		if( (goff[rank+1]-poffset) < DEFAULT_INBUF_SIZE ){
			size_to_read = goff[rank+1]-poffset;
		}
		else{
			size_to_read = DEFAULT_INBUF_SIZE;
		}

		// we load the buffer
		//hold temporary size of SAM
		//due to limitation in MPI_File_read_at
		local_data_tmp =(char*)realloc(local_data_tmp, (size_to_read+1)*sizeof(char));
		local_data_tmp[size_to_read]=0;

		// Original reading part is before 18/09/2015
		MPI_File_read_at(mpi_filed, (MPI_Offset)poffset, local_data_tmp, size_to_read, MPI_CHAR, MPI_STATUS_IGNORE);
		size_t local_offset=0;
		assert(strlen(local_data_tmp) == size_to_read);

		//we look where is the last line read for updating next poffset
		size_t offset_last_line = size_to_read-1;

		size_t extra_char=0;
		while(local_data_tmp[offset_last_line] != '\n'){
			offset_last_line -- ;
			extra_char++;
		}

		local_data_tmp[size_to_read - extra_char]=0;
		size_t local_data_tmp_sz = strlen(local_data_tmp);

		//If it s the last line of file, we place a last '\n' for the function tokenizer
		if(rank == num_proc-1 && ((poffset+size_to_read) == goff[num_proc])){
			local_data_tmp[offset_last_line]='\n';
		}

		//Now we parse Read in local_data
		parser_paired(local_data_tmp, rank, poffset, threshold, nbchr, &readNumberByChr, chrNames, &reads);

		//now we copy local_data_tmp in local_data
		char *p = local_data_tmp;
		int pos =0;
		while (*p && (pos < local_data_tmp_sz)) {*q=*p;p++;q++;pos++;}

		//we go to the next line
		poffset+=(offset_last_line+1);
		local_offset+=(offset_last_line+1);

	}

	assert(size_tmp == strlen(local_data));

	fprintf(stderr, "%d (%.2lf)::::: *** FINISH PARSING FILE ***\n", rank, MPI_Wtime()-toc);

	if (local_data_tmp) free(local_data_tmp);
	malloc_trim(0);

	MPI_Barrier(MPI_COMM_WORLD);

	//We set attribute next of the last read and go back to first read of each chromosome
	for(i = 0; i < nbchr; i++){
		reads[i]->next = NULL;
		reads[i] = anchor[i];
	}
	free(anchor);

	//We count how many reads we found
	size_t nb_reads_total =0,nb_reads_global =0;
	for(j=0;j<nbchr;j++){
		nb_reads_total+=readNumberByChr[j];
	}

	MPI_Allreduce(&nb_reads_total, &nb_reads_global, 1, MPI_UNSIGNED_LONG, MPI_SUM, MPI_COMM_WORLD);

	/*
	 * We care for unmapped and discordants reads
	 */

	int s = 0;
	for (s = 1; s < 3; s++){

		MPI_File mpi_file_split_comm2;
		double time_count;

		size_t total_reads = 0;
		MPI_Allreduce(&readNumberByChr[nbchr-s], &total_reads , 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);

		if ((rank == 0) && (s == 1))
			fprintf(stderr, "rank %d :::: total read to sort for unmapped = %zu \n", rank, total_reads);

		if ((rank == 0) && (s == 2))
			fprintf(stderr, "rank %d :::: total read to sort for discordant = %zu \n", rank, total_reads);

		MPI_Barrier(MPI_COMM_WORLD);

		if (total_reads == 0){
			// nothing to sort for unmapped
			// maybe write an empty bam file
		}
		else{
			int i1,i2;
			size_t *localReadsNum_rank0 = (size_t *)malloc(num_proc*sizeof(size_t));
			localReadsNum_rank0[0] = 0;
			int file_pointer_to_free = 0;
			int split_comm_to_free = 0;
			//we build a vector with rank job
			int val_tmp1 = 0;
			int val_tmp2 = 0;
			int chosen_rank = 0;
			// the color tells in what communicator the rank pertain
			// color = 0 will be the new communicator color
			// otherwise the color is 1
			int *color_vec_to_send =  (int *)malloc(num_proc*sizeof(int));
			// the key value tell the order in the new communicator
			int *key_vec_to_send =  (int *)malloc(num_proc*sizeof(int));

			//rank 0 gather the vector
			MPI_Allgather(&readNumberByChr[nbchr-s] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);

			if (rank == 0){
				//we must chose the first rank with reads to sort
				i1=0;
				while (localReadsNum_rank0[i1] == 0){
					chosen_rank++;
					i1++;
				}
			}

			//we broadcast the chosen rank
			//task: replace the broadcast with a sendrecieve
			MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD);
			MPI_Barrier(MPI_COMM_WORLD);

			//we must chose which rank is going to split the communication
			if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){
				//the rank 0 will recieve the key_vec_to_send and colorvec_to_send
				//first we exchange the size o
				if (rank == chosen_rank){
					header=(char *)malloc((headerSize + 1)*sizeof(char));
					MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
				}
				if (rank == 0){
					MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank,  0, MPI_COMM_WORLD);
				}
			}
			else {
				//we do nothing here
			}

			if (rank == chosen_rank) {

				int counter = 0;
				//we compute the number of 0 in the localReadsNum_vec
				for(i1 = 0; i1 < num_proc; i1++){
					if (localReadsNum_rank0[i1] == 0) {
						counter++;
					}
				}
				// if no jobs without reads we do nothing
				if ( counter == 0 ){
					// nothing to do we associate split_comm with
					split_comm = MPI_COMM_WORLD;
					for (i2 = 0; i2 < num_proc; i2++) {

						if (localReadsNum_rank0[i2] == 0) {
							color_vec_to_send[i2] = 1;
							key_vec_to_send[i2] = val_tmp2;
							val_tmp2++;
						} else {
							color_vec_to_send[i2] = 0;
							key_vec_to_send[i2] = val_tmp1;
							val_tmp1++;
						}
					}
				}
				else{
					// now we compute the color according to
					// the number of reads to sort
					for(i2 = 0; i2 < num_proc; i2++){
						if (localReadsNum_rank0[i2] == 0){
							color_vec_to_send[i2] = 1;
							key_vec_to_send[i2] = val_tmp2;
							val_tmp2++;
						} else{
							color_vec_to_send[i2] = 0;
							key_vec_to_send[i2] = val_tmp1;
							val_tmp1++;
						}
					} // end for loop
				}// end if
			}// end if (rank == chosen_rank)

			MPI_Barrier(MPI_COMM_WORLD);
			// we scatter the key and color vector
			// we create key and color variable for each job
			int local_color = 0;
			int local_key = 0;
			// we scatter the color and key
			MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
			MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
			// we create a communicator
			// we group all communicator
			// with color of zero
			if (local_color == 0){

				MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm);
				ierr = MPI_File_open(split_comm, file_name,  MPI_MODE_RDONLY , finfo, &mpi_file_split_comm2);
				//we ask to liberate file pointer
				file_pointer_to_free = 1;
				//we ask to liberate the split_comm
				split_comm_to_free = 1;
			}
			else{
				MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm);
				mpi_file_split_comm2 = mpi_filed;
			}

			//now we change the rank in the reads structure
			if (local_color == 0){
				MPI_Comm_rank(split_comm, &split_rank);
				MPI_Comm_size(split_comm, &split_size);

				g_rank = split_rank;
				g_size = split_size;

				reads[nbchr-s] = reads[nbchr-s]->next;
				localReadNumberByChr[nbchr-s] = readNumberByChr[nbchr-s];
				if (s == 2){
					unmapped_start = startOffset(g_rank,
												 g_size,
												 unmappedSize,
												 headerSize,
												 nbchr-s,
												 localReadNumberByChr[nbchr-s],
												 split_comm
												 );

					if(!unmapped_start){
						fprintf(stderr, "No header was defined for unmapped. \n Shutting down.\n");
						MPI_Finalize();
						return 0;
					}

					time_count = MPI_Wtime();
					writeSam_discordant_and_unmapped(
							split_rank,
							output_dir,
							header,
							localReadNumberByChr[nbchr-s],
							chrNames[nbchr-s],
							reads[nbchr-s],
							split_size,
							split_comm,
							file_name,
							mpi_file_split_comm2,
							finfo,
							compression_level,
							local_data,
							goff[rank],
							write_sam);

					if (split_rank == chosen_rank){
							fprintf(stderr,	"rank %d :::::[MPISORT] Time to write chromosom %s ,  %f seconds \n\n\n", split_rank,
									chrNames[nbchr-s], MPI_Wtime() - time_count);
					}
				}
				else{
					discordant_start = startOffset(g_rank,
												   g_size,
												   discordantSize,
												   headerSize,
												   nbchr-s,
												   localReadNumberByChr[nbchr-s],
												   split_comm);

					if(!discordant_start){
						fprintf(stderr, "No header was defined for discordant.\n Shutting down.\n");
						MPI_Finalize();
						return 0;
					}
					time_count = MPI_Wtime();

					writeSam_discordant_and_unmapped(
							g_rank,
							output_dir,
							header,
							localReadNumberByChr[nbchr-s],
							chrNames[nbchr-s],
							reads[nbchr-s],
							g_size,
							split_comm,
							file_name,
							mpi_file_split_comm2,
							finfo,
							compression_level,
							local_data,
							goff[rank],
							write_sam
							);


					if (split_rank == chosen_rank){
							fprintf(stderr,	"rank %d :::::[MPISORT] Time to write chromosom %s ,  %f seconds \n\n\n", split_rank,
								chrNames[nbchr-s], MPI_Wtime() - time_count);
					}

				}
				while( reads[nbchr-s]->next != NULL){
						Read *tmp_chr = reads[nbchr-s];
						reads[nbchr-s] = reads[nbchr-s]->next;
						free(tmp_chr);
				}
				free(localReadsNum_rank0);
			}
			else{
				// we do nothing
			}

			//we put a barrier before freeing pointers
			MPI_Barrier(MPI_COMM_WORLD);
			//we free the file pointer

			if  (file_pointer_to_free)
				MPI_File_close(&mpi_file_split_comm2);

			//we free the split_comm
			if (split_comm_to_free)
				MPI_Comm_free(&split_comm);

			split_comm_to_free = 0;
			file_pointer_to_free = 0;

			free(color_vec_to_send);
			free(key_vec_to_send);

		}
	} //end for (s=1; s < 3; s++){

	/*
	 *  We write the mapped reads in a file named chrX.bam
	 *	We loop by chromosoms.
	 */

	MPI_Barrier(MPI_COMM_WORLD);
	for(i = 0; i < (nbchr-2); i++){

		/*
		 * First Part of the algorithm
		 *
		 * In this part we elected a rank which is the first rank
		 * to have reads to sort.
		 *
		 * Once elected a rank, we plit the communicator according to
		 * wether the rank has reads to sort for this chromosom.
		 *
		 * The new communicator is COMM_WORLD.
		 *
		 * If all jobs have reads to sort no need to split the communicator and then
		 * COMM_WORLD = MPI_COMM_WORLD
		 *
		 */

		int i1,i2;
		size_t localReadsNum_rank0[num_proc];
		localReadsNum_rank0[0]=0;
		int file_pointer_to_free = 0;
		int split_comm_to_free = 0;
		//we build a vector with rank job
		int val_tmp1 = 0;
		int val_tmp2 = 0;
		int chosen_rank = 0; //needed to tell what rank is going to compute the color and key
		int chosen_split_rank= 0; //the rank that collect data once the communication splitted normally this rank is 0

		// the color tells in what communicator the rank pertain
		// color = 0 will be the new communicator color
		// otherwise the color is 1
		// the key value tell the order in the new communicator
		int *color_vec_to_send 	=  malloc(num_proc * sizeof(int));
		int *key_vec_to_send 	=  malloc(num_proc * sizeof(int));

		// first we test if the there's reads to sort
		// rank 0 recieve the sum of all the reads count
		size_t total_reads_by_chr = 0;
		MPI_Allreduce(&readNumberByChr[i], &total_reads_by_chr, 1, MPI_LONG_LONG_INT, MPI_SUM, MPI_COMM_WORLD);

		//fprintf(stderr, "rank %d :::: readNumberByChr[i] = %zu \n", rank, readNumberByChr[i]);
		//fprintf(stderr, "rank %d :::: total_reads_by_chr = %zu \n", rank, total_reads_by_chr);

		if (total_reads_by_chr == 0)
			continue; //pass to next chromosome

		//rank 0 gather the vector
		MPI_Allgather(&readNumberByChr[i] , 1, MPI_LONG_LONG_INT, localReadsNum_rank0 , 1, MPI_LONG_LONG_INT, MPI_COMM_WORLD);


		if (rank == 0){
			//the rank 0 chose the first rank with reads to sort
			i1=0;
			while ((localReadsNum_rank0[i1] == 0) && (i1 < num_proc)){
				chosen_rank++;
				i1++;
			}
			fprintf(stderr, "rank %d :::: Elected rank = %d \n", rank, chosen_rank);
		}

		//we broadcast the chosen rank
		//task: replace the broadcast with a sendrecieve
		MPI_Bcast( &chosen_rank, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_Barrier(MPI_COMM_WORLD);

		if (((rank == chosen_rank) || rank == 0) && (chosen_rank != 0)){

			//first we exchange the size o
			if (rank == chosen_rank){
				header = malloc((headerSize + 1)*sizeof(char));
				header[headerSize] = '\0';
				MPI_Recv(header, headerSize + 1, MPI_CHAR, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
			}
			if (rank == 0){
				MPI_Send(header, headerSize + 1, MPI_CHAR, chosen_rank,  0, MPI_COMM_WORLD);
			}
		}
		else {
			//we do nothing here
		}

		MPI_Barrier(MPI_COMM_WORLD);

		if (rank == chosen_rank) {
			int counter = 0;
			//we compute the number of 0 in the localReadsNum_vec
			for(i1 = 0; i1 < num_proc; i1++){
				if (localReadsNum_rank0[i1] == 0) {
						counter++;
					}
			}
			// if no jobs without reads we do nothing
			if ( counter == 0 ){
				// nothing to do we associate split_comm with
				fprintf(stderr, "rank %d ::::[MPISORT] we don't split the rank \n", rank);
				split_comm = MPI_COMM_WORLD;
				for (i2 = 0; i2 < num_proc; i2++) {
					if (localReadsNum_rank0[i2] == 0) {
						color_vec_to_send[i2] = 1;
						key_vec_to_send[i2] = val_tmp2;
						val_tmp2++;
					} else {
						color_vec_to_send[i2] = 0;
						key_vec_to_send[i2] = val_tmp1;
						val_tmp1++;
					}
				}
			}
			else{
				// now we compute the color according to
				// the number of reads to sort
				fprintf(stderr, "rank %d ::::[MPISORT] we split the rank \n", rank);
				for(i2 = 0; i2 < num_proc; i2++){
					if (localReadsNum_rank0[i2] == 0){
						color_vec_to_send[i2] = 1;
						key_vec_to_send[i2] = val_tmp2;
						val_tmp2++;
					} else{
						color_vec_to_send[i2] = 0;
						key_vec_to_send[i2] = val_tmp1;
						val_tmp1++;
					}
				} // end for loop
			}// end if
		}// end if (rank == plit_rank)

		MPI_Barrier(MPI_COMM_WORLD);
		//we create key and color variable for each job
		int local_color = 0;
		int local_key = 0;
		// rank 0 scatter the color and the key vector
		MPI_Scatter( color_vec_to_send, 1, MPI_INT, &local_color, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
		MPI_Scatter( key_vec_to_send, 1, MPI_INT, &local_key, 1, MPI_INT, chosen_rank, MPI_COMM_WORLD);
		MPI_Barrier(MPI_COMM_WORLD);
		// now we create a communicator
		// we group all communicator
		// with color of zero
		if (local_color == 0){
			MPI_Comm_split( MPI_COMM_WORLD, local_color, local_key, &split_comm);
			ierr = MPI_File_open(split_comm, file_name,  MPI_MODE_RDONLY, finfo, &mpi_file_split_comm);
			//we ask to liberate file pointer
			file_pointer_to_free = 1;
			//we ask to liberate the split_comm
			split_comm_to_free = 1;
		}
		else{
			MPI_Comm_split( MPI_COMM_WORLD, MPI_UNDEFINED, local_key, &split_comm);
			mpi_file_split_comm = mpi_filed;
		}

		//now we change the rank in the reads structure
		if (local_color == 0){

			MPI_Comm_rank(split_comm, &split_rank);
			MPI_Comm_size(split_comm, &split_size);
			
			//we update g_rank
			g_rank = split_rank;
			g_size = split_size;
		}
		else{
			g_rank = split_rank;			
			g_size = split_size = num_proc;
		}

		localReadNumberByChr[i] = readNumberByChr[i];
		MPI_Barrier(MPI_COMM_WORLD);

		if ((local_color == 0) && (i < (nbchr - 2))) {

			/*
			 * Second part of the algorithm
			 *
			 * First we load coordinates, offset sources, and read size in vector
			 *
			 * Then we sort the coordinates of the reads
			 * with a bitonic sorter
			 *
			 * Then according to the reads coordinates we reoder the offset sources, and size
			 * this is done thanks to the index of the sorting.
			 *
			 * Afterward we compute the offsets of the reads in
			 * the destination file.
			 *
			 * Finally we dispatch the information to all ranks
			 * in the communicator for the next step.
			 */

			//we do a local merge sort
			if(reads[i] && reads[i]->next && reads[i]->next->next){
				mergeSort(reads[i], readNumberByChr[i]);
			}

			size_t local_readNum = localReadNumberByChr[i];

			reads[i] = reads[i]->next;

			//first we compute the dimension of the parabitonic sort
			// dimension is the number of processors where we
			// perform the bitonic sort
			// int dimensions = (int)(log2(num_processes));
			// find next ( must be greater) power, and go one back
			int dimensions = 1;
			while (dimensions <= split_size)
				dimensions <<= 1;

			dimensions >>= 1;

			// we get the maximum number of reads among
			// all the workers

			/*
			 * Here we split the programm in 2 cases
			 *
			 * 1) The first case de split_size is a power of 2 (the best case)
			 * 		this case is the simpliest we don't have extra communication to dispatch the read
			 * 		envenly between the jobs
			 *
			 * 2) The split_size is not a power of 2 (the worst case)
			 * 		well in this case we shall dispatch the jobs between jobs evenly.
			 *
			 */

			if (split_rank == chosen_split_rank){

				fprintf(stderr,	"Rank %d :::::[MPISORT] Dimensions for bitonic = %d \n", split_rank, dimensions);
				fprintf(stderr,	"Rank %d :::::[MPISORT] Split size 			   = %d \n", split_rank, split_size);

			}
			//we test the computed dimension
			if (dimensions == split_size ){

				size_t max_num_read = 0;
				MPI_Allreduce(&localReadNumberByChr[i], &max_num_read, 1, MPI_LONG_LONG_INT, MPI_MAX, split_comm);

				// if the dimension == split_size
				MPI_Barrier(split_comm);

				size_t first_local_readNum = local_readNum;

				/*
				 * Vector creation and allocation
				 				fprintf(stderr,	"split rank %d :::::[MPISORT] max_num_read = %zu \n", split_rank, max_num_read);
				 */
				local_readNum = max_num_read;

				time_count = MPI_Wtime();

				size_t *local_reads_coordinates_unsorted 	= calloc(local_readNum, sizeof(size_t));
				size_t *local_reads_coordinates_sorted 		= calloc(local_readNum, sizeof(size_t));
				size_t *local_offset_source_unsorted 		= calloc(local_readNum, sizeof(size_t));
				size_t *local_offset_source_sorted 			= calloc(local_readNum, sizeof(size_t));
				int *local_dest_rank_sorted 				= calloc(local_readNum, sizeof(int));
				int *local_reads_sizes_unsorted 			= calloc(local_readNum, sizeof(int));
				int *local_reads_sizes_sorted 				= calloc(local_readNum, sizeof(int));
				int *local_source_rank_unsorted 			= calloc(local_readNum, sizeof(int));
				int *local_source_rank_sorted 				= calloc(local_readNum, sizeof(int));

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][MALLOC 1] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				local_reads_coordinates_unsorted[0] = 0;
				local_reads_coordinates_sorted[0] 	= 0;
				local_dest_rank_sorted[0] 			= 0;
				local_reads_sizes_unsorted[0] 		= 0;
				local_reads_sizes_sorted[0] 		= 0;
				local_source_rank_unsorted[0] 		= 0;
				local_source_rank_sorted[0] 		= 0;
				local_offset_source_unsorted[0] 	= 0;
				local_offset_source_sorted[0] 		= 0;

				//those vectors are the same that  local_..._sorted but without zero padding
				size_t *local_reads_coordinates_sorted_trimmed = NULL;
				int *local_dest_rank_sorted_trimmed = NULL;
				int *local_reads_sizes_sorted_trimmed = NULL;
				size_t *local_offset_source_sorted_trimmed = NULL;
				size_t *local_offset_dest_sorted_trimmed = NULL;
				int *local_source_rank_sorted_trimmed = NULL;

				//vectors used in the bruck just after the parabitonic sort
				size_t *local_reads_coordinates_sorted_trimmed_for_bruck = NULL;
				int *local_dest_rank_sorted_trimmed_for_bruck = NULL;
				int *local_reads_sizes_sorted_trimmed_for_bruck = NULL;
				size_t *local_offset_source_sorted_trimmed_for_bruck = NULL;
				size_t *local_offset_dest_sorted_trimmed_for_bruck = NULL;
				int *local_source_rank_sorted_trimmed_for_bruck = NULL;


				//task Init offset and size for source - free chr
				// from mpiSort_utils.c
				get_coordinates_and_offset_source_and_size_and_free_reads(
						split_rank,
						local_source_rank_unsorted,
						local_reads_coordinates_unsorted,
						local_offset_source_unsorted,
						local_reads_sizes_unsorted,
						reads[i],
						first_local_readNum
				);

				//init indices for qksort
				size_t *coord_index = (size_t*)malloc(local_readNum*sizeof(size_t));

				for(j = 0; j < local_readNum; j++){
					coord_index[j] = j;
				}

				//To start we sort locally the reads coordinates.
				//this is to facilitate the bitonic sorting
				//if the local coordinates to sort are to big we could get rid of
				//this step.
				time_count = MPI_Wtime();

				base_arr2 = local_reads_coordinates_unsorted;
				qksort(coord_index, local_readNum, sizeof(size_t), 0, local_readNum - 1, compare_size_t);

				if (split_rank == chosen_split_rank)
						fprintf(stderr,	"rank %d :::::[MPISORT][LOCAL SORT] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				//We index data
				for(j = 0; j < local_readNum; j++){
					local_reads_coordinates_sorted[j] 			= local_reads_coordinates_unsorted[coord_index[j]];
					local_source_rank_sorted[j] 				= local_source_rank_unsorted[coord_index[j]];
					local_reads_sizes_sorted[j] 				= local_reads_sizes_unsorted[coord_index[j]];
					local_offset_source_sorted[j] 				= local_offset_source_unsorted[coord_index[j]];
					local_dest_rank_sorted[j] 					= rank; //will be updated after sorting the coordinates
				}

				/*
				*   FOR DEBUG
				*  
					

				for(j = 0; j < local_readNum - 1; j++){
					assert( local_reads_coordinates_sorted[j] < local_reads_coordinates_sorted[j+1]);
				}
				*/

				free(coord_index); 				 		//ok
				free(local_source_rank_unsorted); 	    //ok
				free(local_reads_coordinates_unsorted); //ok
				free(local_reads_sizes_unsorted); 		//ok
				free(local_offset_source_unsorted); 	//ok

				// we need the total number of reads.
				size_t total_num_read = 0;
				MPI_Allreduce(&localReadNumberByChr[i], &total_num_read, 1, MPI_LONG_LONG_INT, MPI_SUM, split_comm);

				/*
				 *
				 * In this section the number of bitonic dimension
				 * is equal to the split size.
				 *
				 * In this case there are less communication in preparation
				 * of the sorting.
				 *
				 * We use the parabitonic version 2.
				 */

				//we calll the bitonic

				time_count = MPI_Wtime();

				ParallelBitonicSort2(
					split_comm,
					split_rank,
					dimensions,
					local_reads_coordinates_sorted,
					local_reads_sizes_sorted,
					local_source_rank_sorted,
					local_offset_source_sorted,
					local_dest_rank_sorted,
					max_num_read
					);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2] time spent = %f s\n",
											split_rank, MPI_Wtime() - time_count);
				size_t k1;
				size_t tmp2 = 0;
				for (k1 = 1; k1 < max_num_read; k1++){
					assert(local_reads_coordinates_sorted[k1-1] <= local_reads_coordinates_sorted[k1]);
					local_dest_rank_sorted[k1]= split_rank;
				}
				/*
				for (k1 = 0; k1 < max_num_read; k1++){
					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_reads_coordinates_sorted[%zu]= %zu s\n",
											split_rank, k1, local_reads_coordinates_sorted[k1]);

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_source_rank_sorted[%zu]= %d s\n",
											split_rank, k1, local_source_rank_sorted[k1]);							
				}
				*/
				size_t *local_offset_dest_sorted = malloc(max_num_read*sizeof(size_t));
				size_t last_local_offset = 0;


				// We compute the local_dest_offsets_sorted
				size_t local_total_offset = 0;

				for (k1 = 0; k1 <  max_num_read; k1++){
					local_offset_dest_sorted[k1] = local_reads_sizes_sorted[k1];
					local_total_offset += local_reads_sizes_sorted[k1];
				}

				//we make the cumulative sum of all offsets
				for (k1 = 1; k1 < max_num_read; k1++){
					local_offset_dest_sorted[k1] = local_offset_dest_sorted[k1 - 1] + local_offset_dest_sorted[k1];
				}

				//we exchange the last destination offset
				last_local_offset = local_offset_dest_sorted[max_num_read-1];


				//number of block to send
				int blocksize = 1;

				MPI_Offset *y  = calloc(split_size, sizeof(MPI_Offset));
				MPI_Offset *y2 = calloc(split_size + 1, sizeof(MPI_Offset));

				//we wait all processors

				MPI_Gather(&last_local_offset, 1, MPI_LONG_LONG_INT, y, 1, MPI_LONG_LONG_INT, 0, split_comm);

				if (split_rank ==0){
					for (k1 = 1; k1 < (split_size + 1); k1++) {
						y2[k1] = y[k1-1];
					}
				}

				if (split_rank ==0){
					for (k1 = 1; k1 < (split_size +1); k1++) {
						y2[k1] = y2[k1-1] + y2[k1];
					}
				}

				size_t offset_to_add = 0;
				MPI_Scatter(y2, 1, MPI_LONG_LONG_INT, &offset_to_add, 1, MPI_LONG_LONG_INT, 0, split_comm);

				free(y);
				free(y2);

				//we add offset of the previous rank
				for (k1 = 0; k1 < max_num_read; k1++){
					if (local_reads_sizes_sorted[k1] != 0)
						local_offset_dest_sorted[k1] += offset_to_add;
					else
						local_offset_dest_sorted[k1] = 0;
				}


				/*
				for (k1 = 0; k1 < max_num_read; k1++){

					fprintf(stderr, "\n");

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_reads_coordinates_sorted[%zu]= %zu s\n",
											split_rank, k1, local_reads_coordinates_sorted[k1]);

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_source_rank_sorted[%zu]= %d s\n",
											split_rank, k1, local_source_rank_sorted[k1]);							
				

					fprintf(stderr,	"rank %d :::::[MPISORT][BITONIC 2]  local_offset_dest_sorted[%zu]= %d s\n",
											split_rank, k1, local_offset_dest_sorted[k1]);							
				
					fprintf(stderr, "\n");
				}
				*/

				/*
				 * we update destination rank according to
				 * original number of reads read.
				 *
				 */

				//we compute the new rank dest according to max_num_read
				size_t previous_num_reads_per_job[dimensions];
				//we create a vector of size split_size with previous reads per job
				MPI_Allgather(&first_local_readNum , 1, MPI_LONG_LONG_INT, previous_num_reads_per_job , 1, MPI_LONG_LONG_INT, split_comm);

				// we compute the position of of the read in the first
				// reference without the zero padding of bitonic
				size_t pos_ref0 = 0;

				//we need the number of zeros we add for the padding
				size_t N0 = max_num_read*dimensions - total_num_read;

				int new_rank = 0;
				int previous_rank = 0;
				// we compute the new rank for
				// the reads sorted by offset destination
				size_t h = 0;


				pos_ref0 = max_num_read*split_rank - N0;
				for(j = 0; j < max_num_read; j++) {
					if ( local_reads_sizes_sorted[j] != 0){
						int new_rank = chosen_split_rank;
						pos_ref0 = (max_num_read*split_rank +j) - N0;
						if (pos_ref0 >= 0) {
							size_t tmp2 = 0;
							for (h = 0; h < dimensions; h++){
								tmp2 += previous_num_reads_per_job[h];
								if ( pos_ref0 < tmp2)  {
									new_rank = h;
									break;
									}
								}
							previous_rank = local_dest_rank_sorted[j];
							local_dest_rank_sorted[j] = new_rank;
						}
					}
				}

				MPI_Barrier(split_comm);

				size_t offset  = 0;
				size_t numItems = 0;
				size_t num_read_for_bruck = 0;
				int *p = local_reads_sizes_sorted;
				if (p[0] != 0) {offset = 0;};
				if (p[max_num_read -1] == 0){offset = max_num_read;}
				else {while ((*p == 0) && (offset < max_num_read )){ offset++; p++;}}

				/*
				 * REMOVE ZERO PADDING BEFORE BRUCK
				 *
				 */

				time_count = MPI_Wtime();

				if (offset > 0){

					// we remove zeros in the vector we have 2 cases
					// the first offset <  max_num_read
					// and the entire vector is null
					if ( offset < max_num_read ){

						numItems = max_num_read - offset;

						local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(numItems * sizeof(size_t));
						local_offset_source_sorted_trimmed_for_bruck        = malloc(numItems * sizeof(size_t));
						local_offset_dest_sorted_trimmed_for_bruck			= malloc(numItems * sizeof(size_t));
						local_reads_sizes_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(int));
						local_dest_rank_sorted_trimmed_for_bruck            = malloc(numItems * sizeof(int));
						local_source_rank_sorted_trimmed_for_bruck 		    = malloc(numItems * sizeof(int));
						size_t y=0;

						for (y = 0; y < numItems; y++){

							local_reads_coordinates_sorted_trimmed_for_bruck[y]    = local_reads_coordinates_sorted[y+offset];
							local_offset_source_sorted_trimmed_for_bruck[y]        = local_offset_source_sorted[y+offset];
							local_offset_dest_sorted_trimmed_for_bruck[y]		   = local_offset_dest_sorted[y+offset];
							local_reads_sizes_sorted_trimmed_for_bruck[y]          = local_reads_sizes_sorted[y+offset];
							local_dest_rank_sorted_trimmed_for_bruck[y]            = local_dest_rank_sorted[y+offset];
							local_source_rank_sorted_trimmed_for_bruck[y] 		   = local_source_rank_sorted[y+offset];
						}

						num_read_for_bruck = numItems;

						/*
						 *
						 * FOR DEBUG
						 *

						for(y = 0; y < num_read_for_bruck; y++){
							assert( local_reads_sizes_sorted_trimmed_for_bruck[y] 		!= 0 );
							assert( local_source_rank_sorted_trimmed_for_bruck[y] 		< dimensions);
							assert( local_dest_rank_sorted_trimmed_for_bruck[y]   		< dimensions);
							assert( local_offset_source_sorted_trimmed_for_bruck[y] 	!= 0);
							assert( local_offset_dest_sorted_trimmed_for_bruck[y] 	    != 0);
							assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0);
						}
						*/

					}
					else{

						numItems = 0;
						local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(numItems * sizeof(size_t));
						local_offset_source_sorted_trimmed_for_bruck        = malloc(numItems * sizeof(size_t));
						local_offset_dest_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(size_t));
						local_reads_sizes_sorted_trimmed_for_bruck          = malloc(numItems * sizeof(int));
						local_dest_rank_sorted_trimmed_for_bruck            = malloc(numItems * sizeof(int));
						local_source_rank_sorted_trimmed_for_bruck 		    = malloc(numItems * sizeof(int));
						num_read_for_bruck = 0;
					}
				}
				else {

					numItems = local_readNum;
					local_reads_coordinates_sorted_trimmed_for_bruck    = malloc(local_readNum * sizeof(size_t));
					local_offset_source_sorted_trimmed_for_bruck        = malloc(local_readNum * sizeof(size_t));
					local_offset_dest_sorted_trimmed_for_bruck          = malloc(local_readNum * sizeof(size_t));
					local_reads_sizes_sorted_trimmed_for_bruck          = malloc(local_readNum * sizeof(int));
					local_dest_rank_sorted_trimmed_for_bruck            = malloc(local_readNum * sizeof(int));
					local_source_rank_sorted_trimmed_for_bruck 		    = malloc(local_readNum * sizeof(int));

					size_t y=0;
					for (y = 0; y < local_readNum; y++){

						local_reads_coordinates_sorted_trimmed_for_bruck[y]    = local_reads_coordinates_sorted[y];
						local_offset_source_sorted_trimmed_for_bruck[y]        = local_offset_source_sorted[y];
						local_offset_dest_sorted_trimmed_for_bruck[y]          = local_offset_dest_sorted[y];
						local_reads_sizes_sorted_trimmed_for_bruck[y]          = local_reads_sizes_sorted[y];
						local_dest_rank_sorted_trimmed_for_bruck[y]            = local_dest_rank_sorted[y];
						local_source_rank_sorted_trimmed_for_bruck[y] 		   = local_source_rank_sorted[y];
					}

					num_read_for_bruck = numItems;

					/*
					 *
					 * FOR DEBUG
					 *
					for(y = 0; y < num_read_for_bruck; y++){
						assert( local_reads_sizes_sorted_trimmed_for_bruck[y] 		!= 0 );
						assert( local_source_rank_sorted_trimmed_for_bruck[y] 		< dimensions);
						assert( local_dest_rank_sorted_trimmed_for_bruck[y]   		< dimensions);
						assert( local_offset_source_sorted_trimmed_for_bruck[y] 	!= 0);
						assert( local_offset_dest_sorted_trimmed_for_bruck[y] 	    != 0);
						assert( local_reads_coordinates_sorted_trimmed_for_bruck[y] != 0);
					}
					*/
				}

				free(local_reads_coordinates_sorted);
				free(local_offset_source_sorted);
				free(local_offset_dest_sorted);
				free(local_reads_sizes_sorted);
				free(local_dest_rank_sorted);
				free(local_source_rank_sorted);


				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][TRIMMING] time spent = %f s\n", split_rank, MPI_Wtime() - time_count);

				/*
				 * We do a Bruck on rank of origin reading
				 */

				size_t m=0;
				int num_proc = dimensions;
				size_t *number_of_reads_by_procs = calloc( dimensions, sizeof(size_t));

				//fprintf(stderr,	"rank %d :::::[MPISORT] num_read_for_bruck = %zu \n", split_rank, num_read_for_bruck);

				for(m = 0; m < num_read_for_bruck; m++){
					 //assert(new_pbs_orig_rank_off_phase1[m] < dimensions);
					 //assert(new_pbs_dest_rank_phase1[m] < dimensions);
					 number_of_reads_by_procs[local_source_rank_sorted_trimmed_for_bruck[m]]++;
				}

				int *local_source_rank_sorted_trimmed_for_bruckv2 = malloc( num_read_for_bruck * sizeof(int));

				for(m = 0; m < num_read_for_bruck; m++){
					local_source_rank_sorted_trimmed_for_bruckv2[m] = local_source_rank_sorted_trimmed_for_bruck[m];
				}

				size_t count6 = 0;
				for(m = 0; m < dimensions; m++){
					count6 += number_of_reads_by_procs[m];
				}

				assert( count6 == num_read_for_bruck );
				MPI_Barrier(split_comm);

				size_t **reads_coordinates 		= malloc(sizeof(size_t *) * dimensions);
				size_t **local_source_offsets 	= malloc(sizeof(size_t *) * dimensions);
				size_t **dest_offsets 			= malloc(sizeof(size_t *) * dimensions);
				int **read_size 				= malloc(sizeof(int *) * dimensions);
				int **dest_rank 				= malloc(sizeof(int *) * dimensions);
				int **source_rank				= malloc(sizeof(int *) * dimensions);

				/*
				 * We send in order
				 *
				 * local_offset_source_sorted_trimmed_for_bruck
				 * local_dest_rank_sorted_trimmed_for_bruck
				 * local_reads_coordinates_sorted_trimmed_for_bruck
				 * local_reads_sizes_sorted_trimmed_for_bruck
				 *
				 */

				COMM_WORLD = split_comm;
				time_count = MPI_Wtime();

				bruckWrite3(split_rank,
							dimensions,
							count6,
							number_of_reads_by_procs,
							local_source_rank_sorted_trimmed_for_bruckv2,
							local_offset_source_sorted_trimmed_for_bruck,     //offset sources
							&local_source_offsets,
							local_dest_rank_sorted_trimmed_for_bruck,     	  //destination rank
							&dest_rank,
							local_reads_coordinates_sorted_trimmed_for_bruck, //reads coordinates
							&reads_coordinates,
							local_reads_sizes_sorted_trimmed_for_bruck,       //read size
							&read_size,
							local_source_rank_sorted_trimmed_for_bruck,		  //source rank
							&source_rank,
							local_offset_dest_sorted_trimmed_for_bruck,
							&dest_offsets
				);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][BRUCK 3] time spent = %f s\n",
							split_rank, MPI_Wtime() - time_count);


				time_count = MPI_Wtime();

				free(local_reads_coordinates_sorted_trimmed_for_bruck);
				free(local_dest_rank_sorted_trimmed_for_bruck);
				free(local_reads_sizes_sorted_trimmed_for_bruck);
				free(local_offset_source_sorted_trimmed_for_bruck);
				free(local_offset_dest_sorted_trimmed_for_bruck);
				free(local_source_rank_sorted_trimmed_for_bruck);
				free(local_source_rank_sorted_trimmed_for_bruckv2);

				local_reads_coordinates_sorted_trimmed 	  = malloc(first_local_readNum * sizeof(size_t));
				local_offset_source_sorted_trimmed   	  = malloc(first_local_readNum * sizeof(size_t));
				local_offset_dest_sorted_trimmed   	  	  = malloc(first_local_readNum * sizeof(size_t));
				local_dest_rank_sorted_trimmed   		  = malloc(first_local_readNum * sizeof(int));
				local_source_rank_sorted_trimmed		  = malloc(first_local_readNum * sizeof(int));
				local_reads_sizes_sorted_trimmed		  = malloc(first_local_readNum * sizeof(int));

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT][FREE + MALLOC] time spent = %f s\n",
											split_rank, MPI_Wtime() - time_count);
				/*
				 * GET DATA AFTER BRUCK
				 *
				 */

				j=0;
				size_t k = 0;

				for(m = 0; m < num_proc; m++)
				{
					for(k = 0; k < number_of_reads_by_procs[m]; k++)
					{
						
						local_offset_dest_sorted_trimmed[k + j] 		= dest_offsets[m][k];
						local_dest_rank_sorted_trimmed[k + j] 			= dest_rank[m][k];
						local_reads_sizes_sorted_trimmed[k + j] 		= read_size[m][k];
						local_offset_source_sorted_trimmed[k + j] 		= local_source_offsets[m][k];
						local_reads_coordinates_sorted_trimmed[k + j] 	= reads_coordinates[m][k];
						local_source_rank_sorted_trimmed[k + j] 		= source_rank[m][k];

					}
					free(dest_offsets[m]);
					free(dest_rank[m]);
					free(read_size[m]);
					free(local_source_offsets[m]);
					free(reads_coordinates[m]);
					free(source_rank[m]);
					j += number_of_reads_by_procs[m];
				}


				free(number_of_reads_by_procs);
				if (dest_rank != NULL)
					free(dest_rank);
				if (read_size != NULL)
					free(read_size);
				if (local_source_offsets != NULL)
					free(local_source_offsets);
				if (reads_coordinates != NULL)
					free(reads_coordinates);
				if (source_rank != NULL)
					free(source_rank);
				if (dest_offsets != NULL)
					free(dest_offsets);

				local_readNum = first_local_readNum;


				/*
				 *
				 * FOR DEBUG
				 *
				for ( j = 0; j < local_readNum; j++){
					assert ( local_reads_coordinates_sorted_trimmed[j]    != 0 );
					assert ( local_offset_source_sorted_trimmed[j]        != 0 );
					assert ( local_offset_dest_sorted_trimmed[j]   		  != 0 );
					assert ( local_reads_sizes_sorted_trimmed 			  != 0 );
					assert ( local_dest_rank_sorted_trimmed[j]            < split_size );
					assert ( local_source_rank_sorted_trimmed[j] 		  < split_size );
				}
				*/

				free(local_reads_coordinates_sorted_trimmed);

				if (split_rank == chosen_split_rank)
					fprintf(stderr,	"rank %d :::::[MPISORT] we call write SAM \n", split_rank);

				malloc_trim(0);

				time_count = MPI_Wtime();

				writeSam(
					split_rank,
					output_dir,
					header,
					local_readNum,
					total_reads_by_chr,
					chrNames[i],
					reads[i],
					split_size,
					split_comm,
					chosen_split_rank,
					file_name,
					mpi_file_split_comm,
					finfo,
					compression_level,
					local_offset_dest_sorted_trimmed,
					local_offset_source_sorted_trimmed,
					local_reads_sizes_sorted_trimmed,
					local_dest_rank_sorted_trimmed,
					local_source_rank_sorted_trimmed,
					local_data,
					goff[rank],
					first_local_readNum
				);

				if (split_rank == chosen_split_rank){
					fprintf(stderr,	"rank %d :::::[MPISORT][WRITESAM] chromosom %s :::  %f seconds\n\n\n",
							split_rank, chrNames[i], MPI_Wtime() - time_count);

				}
			}
			else{

				/*
				 * We are in the case the number of cpu is
				 * not a power of 2
				 *
				 *
				 */

				parallel_sort_any_dim(
						dimensions, 				//dimension for parabitonic
						local_readNum,
						split_rank,
						split_size,
						reads,
						i, 							//chromosom number
						chosen_split_rank,
						split_comm,
						localReadNumberByChr,
						local_data,
						file_name,
						output_dir,
						finfo,
						compression_level,
						total_reads_by_chr,
						goff[rank],
						headerSize,
						header,
						chrNames[i],
						mpi_file_split_comm
					);

			} //end if dimensions < split_rank

		} //if ((local_color == 0) && (i < (nbchr - 2))) //in the splitted dimension
		else{
			//we do nothing here
		}

		//we put a barrier before freeing pointers
		MPI_Barrier(MPI_COMM_WORLD);
		//we free the file pointer
		if  (file_pointer_to_free)
			MPI_File_close(&mpi_file_split_comm);
		//we free the split_comm
		if (split_comm_to_free){
			MPI_Comm_free(&split_comm);
		}

		free(color_vec_to_send);
		free(key_vec_to_send);

	}// end loop upon chromosoms (line 665)
Beispiel #27
0
int main(int argc, char *argv[])
{
    int proc_num, my_rank;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &proc_num);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

    // check arguments
    if (argc != 10){ usage(); }

    int b, x, y, z, t_start, t_end, t_replay_start, t_replay_end, replay_time;
    int i, j, k, t, m, n, tmp;
    int err;
    MPI_Status status;

    // init
    char *fname = argv[1];
    b = atoi(argv[2]);				// number of variables(int)
    x = atoi(argv[3]);				// number of rows of cubic
    y = atoi(argv[4]);
    z = atoi(argv[5]);
    t_start = atoi(argv[6]);		// start time step
    t_end = atoi(argv[7]);
    t_replay_start = atoi(argv[8]); // "replay" start time step
    t_replay_end = atoi(argv[9]);

    if(my_rank == 0)
        printf("b:%d x:%d y:%d z:%d t_start:%d t_end:%d t_replay_start:%d t_replay_start:%d \n",
    		b, x, y, z, t_start, t_end, t_replay_start, t_replay_end);

    MPI_Info info = MPI_INFO_NULL;
    MPI_File fh;

/*              | b |
 			    ____________
			   /           /|  ...Proc0
			  /           //|
			 /___________// |z
	myrows I |__________|/  |
			 |			|  /
			 |          | /y
			 |          |/    ...ProcN
			 ------------
				  x*b
				  T0
*/

    // distribute work to different procs
    int myrows = y / proc_num;
    int myreadsize = b * x * myrows * z * (t_end - t_start);
    // size of on row
    int myonereadsize = x * b;

    // allocate buffer for reading myrows * x * z * b bytes data
    // for each time step
    int *buf = (int*)malloc(myreadsize * sizeof(int));
    assert(buf != NULL);


    // Open file
    err = MPI_File_open(MPI_COMM_WORLD, fname, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
    assert(err == MPI_SUCCESS);

    int start_offset = 0;
    int read_cnt = 0;
    double start, finish;
    double total_io = 0.0;

    for(t = t_start; t < t_end; t++){

        // start together
        MPI_Barrier(MPI_COMM_WORLD);

        start = MPI_Wtime();

    	// each time step start offset is t*b*x*y*z*sizeof(int)
    	start_offset = t * b * x * y * z;

    	// read a slice of each time step
    	for(i = 0; i < z; i++){
        	for(j = 0; j < myrows; j++){
        		MPI_File_read_at(fh, (start_offset + i*b*x*y + j*b*x + my_rank * myonereadsize * myrows) * sizeof(int)
        				, &buf[read_cnt*myonereadsize], myonereadsize, MPI_INT, &status);
        		read_cnt++;
        	}
    	}

        finish = MPI_Wtime();
        if(my_rank == 0) printf("%d: I/O time %lf\n", t, finish - start);
        total_io += finish - start;

        start = MPI_Wtime();

        // do some computation here
        sleep(myrows / 8); 
        
        finish = MPI_Wtime();
        //if(my_rank == 0) printf("%d: Computation time %lf\n", t, finish - start);

    }

/*
    read_cnt = 0;
    for(t = t_replay_start; t < t_replay_end; t++){

        // start together
        MPI_Barrier(MPI_COMM_WORLD);

        start = MPI_Wtime();

    	// each time step start offset is t*b*x*y*z*sizeof(int)
    	start_offset = t * b * x * y * z;

    	// read a slice of each time step
    	for(i = 0; i < z; i++){
        	for(j = 0; j < myrows; j++){
        		MPI_File_read_at(fh, (start_offset + i*b*x*y + j*b*x + my_rank * myonereadsize * myrows) * sizeof(int)
        				, &buf[read_cnt*myonereadsize], myonereadsize, MPI_INT, &status);
        		read_cnt++;
        	}
    	}

        finish = MPI_Wtime();
        if(my_rank == 0) printf("%d: I/O time %lf\n", t, finish - start);
        total_io += finish - start;

        start = MPI_Wtime();

        // do some computation here
        sleep(myrows / 8);
    
        finish = MPI_Wtime();
        //if(my_rank == 0) printf("%d: Computation time %lf\n", t, finish - start);

    }

*/
    err = MPI_File_close(&fh);
    assert(err == MPI_SUCCESS);
    if(my_rank == 0)
        printf("Avg reading time: %lf\n",total_io/(t_end-t_start+ t_replay_end-t_replay_start));

    /*
    // check read numbers
    if(my_rank == 1){
		int cnt = 0;
		for(i = 0; i < b*x*myrows*z*(t_end-t_start); i++){
			if(i % (x*b) == 0)
				printf("\n");
			if(i % (x*b*myrows) == 0)
				printf("\n==============%d============\n\n",cnt++);
			printf(" %3d",buf[i]);
		}
		printf("\n");
    }

    */
	free(buf);

	MPI_Finalize();
    return 0;
}
int main (int argc, char *argv[])
{
    
    int proc_num, my_rank, len;
    int i, j;
    double start_time, elapsed_time, all_time;
    double all_time_max, all_time_avg, all_time_min;
    MPI_Status status;
    MPI_File fh;
    MPI_Datatype contig_type;

    MPI_Init(&argc, &argv);
    
    // get the number of procs and rank in the comm
    MPI_Comm_size(MPI_COMM_WORLD, &proc_num);
    MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
    
    if(argc != 4) {
        printf("Wrong argument number!\n");
        printf("Use %s filename request_size repeat_times\n", argv[0]);
        return 0;
    }

    int req_size       = atoi(argv[2]);
    int repeat_time    = atoi(argv[3]);
    MPI_Offset stride  = proc_num * req_size;
    MPI_Offset tmp_pos = my_rank * req_size;

    char *read_data    = (char*)malloc(req_size);
     
    MPI_Type_contiguous( req_size, MPI_CHAR, &contig_type);
    MPI_Type_commit(&contig_type);


    start_time = MPI_Wtime();
    //MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
    if(fh==NULL){
    	printf("File not exist\n");
    	return -1;
    }
       
    for(i = 0; i < repeat_time; i++) {
    
    //    MPI_Barrier(MPI_COMM_WORLD);
        MPI_File_read_at( fh, tmp_pos, read_data, 1, contig_type, &status );
        tmp_pos += stride;
    }
   

    MPI_File_close(&fh);

    elapsed_time = MPI_Wtime() - start_time;

    MPI_Reduce(&elapsed_time, &all_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
    MPI_Reduce(&elapsed_time, &all_time_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
    MPI_Reduce(&elapsed_time, &all_time_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

    all_time_avg /= proc_num;



    MPI_Barrier(MPI_COMM_WORLD);
    
    double data_in_mb = (proc_num*(double)req_size*repeat_time)/(1024.0*1024.0);
    if(my_rank == 0)
        printf("Total time: %lf Min time: %lf Avg time: %lf Total data: %dM Agg Bandwidth: %lf\n", all_time, all_time_min, all_time_avg, (int)data_in_mb, data_in_mb/all_time);
    

//    printf("%d: %lf\n",my_rank, elapsed_time);
    free(read_data);
    MPI_Type_free(&contig_type);
    
    MPI_Finalize();

    return 0;
}
Beispiel #29
0
PIDX_return_code PIDX_generic_rst_buf_read_and_aggregate(PIDX_generic_rst_id generic_rst_id)
{
  PIDX_variable_group var_grp = generic_rst_id->idx->variable_grp[generic_rst_id->group_index];
  PIDX_variable var0 = var_grp->variable[generic_rst_id->first_index];

  // This process does not have any patch to process (after restructuring)
  if (var0->patch_group_count == 0)
      return PIDX_success;

  int v;
  MPI_File fh;
  char *directory_path;
  char *data_set_path;

  directory_path = malloc(sizeof(*directory_path) * PATH_MAX);
  memset(directory_path, 0, sizeof(*directory_path) * PATH_MAX);

  data_set_path = malloc(sizeof(*data_set_path) * PATH_MAX);
  memset(data_set_path, 0, sizeof(*data_set_path) * PATH_MAX);

  strncpy(directory_path, generic_rst_id->idx->filename, strlen(generic_rst_id->idx->filename) - 4);
  char time_template[512];
  sprintf(time_template, "%%s/%s", generic_rst_id->idx->filename_time_template);
  sprintf(data_set_path, time_template, directory_path, generic_rst_id->idx->current_time_step);

  for (v = generic_rst_id->first_index; v <= generic_rst_id->last_index; ++v)
  {
    PIDX_variable var = var_grp->variable[v];
    //int bytes_per_value = var->bpv / 8;

    // copy the size and offset to output
    Ndim_patch_group patch_group = var->rst_patch_group;
    Ndim_patch out_patch = var->rst_patch_group->reg_patch;

    int nx = out_patch->size[0];
    int ny = out_patch->size[1];
    int nz = out_patch->size[2];

    var->rst_patch_group->reg_patch->buffer = malloc(nx * ny * nz * (var->bpv/8) * var->vps);
    memset(var->rst_patch_group->reg_patch->buffer, 0, nx * ny * nz * (var->bpv/8) * var->vps);

    if (var->rst_patch_group->reg_patch->buffer == NULL)
      return PIDX_err_chunk;

    int data_offset = 0, v1 = 0;
    for (v1 = 0; v1 < v; v1++)
      data_offset = data_offset + (out_patch->size[0] * out_patch->size[1] * out_patch->size[2] * (var_grp->variable[v1]->vps * (var_grp->variable[v1]->bpv/8)));

    int buffer_size =  out_patch->size[0] * out_patch->size[1] * out_patch->size[2] * (var->vps * (var->bpv/8));
    char *file_name;
    file_name = malloc(PATH_MAX * sizeof(*file_name));
    memset(file_name, 0, PATH_MAX * sizeof(*file_name));

    sprintf(file_name, "%s/time%09d/%d_0", directory_path, generic_rst_id->idx->current_time_step, generic_rst_id->idx_c->grank);

    MPI_Status status;
    int ret = 0;
    ret = MPI_File_open(MPI_COMM_SELF, file_name, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh);
    if (ret != MPI_SUCCESS)
    {
      fprintf(stderr, "Line %d File %s File opening %s\n", __LINE__, __FILE__, file_name);
      return PIDX_err_rst;
    }

    ret = MPI_File_read_at(fh, data_offset, out_patch->buffer, (buffer_size), MPI_BYTE, &status);
    if (ret != MPI_SUCCESS)
    {
      fprintf(stderr, "Line %d File %s\n", __LINE__, __FILE__);
      return PIDX_err_rst;
    }

    ret = MPI_File_close(&fh);
    if (ret != MPI_SUCCESS)
    {
      fprintf(stderr, "Line %d File %s\n", __LINE__, __FILE__);
      return PIDX_err_rst;
    }

    int k1, j1, i1, r, index = 0, recv_o = 0, send_o = 0, send_c = 0;
    for (r = 0; r < var->rst_patch_group->count; r++)
    {
      for (k1 = patch_group->patch[r]->offset[2]; k1 < patch_group->patch[r]->offset[2] + patch_group->patch[r]->size[2]; k1++)
      {
        for (j1 = patch_group->patch[r]->offset[1]; j1 < patch_group->patch[r]->offset[1] + patch_group->patch[r]->size[1]; j1++)
        {
          for (i1 = patch_group->patch[r]->offset[0]; i1 < patch_group->patch[r]->offset[0] + patch_group->patch[r]->size[0]; i1 = i1 + patch_group->patch[r]->size[0])
          {
            index = ((patch_group->patch[r]->size[0])* (patch_group->patch[r]->size[1]) * (k1 - patch_group->patch[r]->offset[2])) + ((patch_group->patch[r]->size[0]) * (j1 - patch_group->patch[r]->offset[1])) + (i1 - patch_group->patch[r]->offset[0]);
            send_o = index * var->vps * (var->bpv/8);
            send_c = (patch_group->patch[r]->size[0]);
            recv_o = (nx * ny * (k1 - out_patch->offset[2])) + (nx * (j1 - out_patch->offset[1])) + (i1 - out_patch->offset[0]);

            memcpy(var->rst_patch_group->patch[r]->buffer + send_o, out_patch->buffer + (recv_o * var->vps * (var->bpv/8)), send_c * var->vps * (var->bpv/8));
          }
        }
      }
    }

    free(var->rst_patch_group->reg_patch->buffer);
    var->rst_patch_group->reg_patch->buffer = 0;

  }

  return PIDX_success;
}
int main (int argc, char *argv[]) {
	int rank, size;

	MPI_File fh_in, fh_out;
	MPI_Offset offset;
	MPI_Status status;
	MPI_Group origin_group, new_group;
	MPI_Comm custom_world = MPI_COMM_WORLD;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(custom_world, &size);
	MPI_Comm_rank(custom_world, &rank);

	// read command
	if (argc < 4) {
		if (rank == MASTER_RANK) {
			fprintf(stderr, "Insufficient args\n");
			fprintf(stderr, "Usage: %s N input_file output_file", argv[0]);
		}
		return 0;
	}

	const int N = atoi(argv[1]);
	const char *INPUT_NAME = argv[2];
	const char *OUTPUT_NAME = argv[3];

	// Deal with the case where (N < size)
	if (N < size) {
		// obtain the group of proc. in the world communicator
		MPI_Comm_group(custom_world, &origin_group);

		// remove unwanted ranks
		int ranges[][3] = {{N, size-1, 1}};
		MPI_Group_range_excl(origin_group, 1, ranges, &new_group);

		// create a new communicator
		MPI_Comm_create(custom_world, new_group, &custom_world);

		if (custom_world == MPI_COMM_NULL) {
			// terminate those unwanted processes
			MPI_Finalize();
			exit(0);
		}

		size = N;
	}

	// Read file using MPI-IO
	int *local_buf;
	int num_per_node = N / size;
	offset = rank * num_per_node * sizeof(int);

	if (rank == (size - 1)) {
		num_per_node += N % size;
	}

	local_buf = malloc(num_per_node * sizeof(int));

	MPI_File_open(custom_world, INPUT_NAME, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh_in);
	MPI_File_read_at(fh_in, offset, local_buf, num_per_node, MPI_INT, &status);
	MPI_File_close(&fh_in);

	// Odd-even sort
	int sorted = false, all_sorted = false;
	int recv;
	while (!sorted || !all_sorted) {
		sorted = true;
		// local sorting
		int i;
		// odd-phase
		for (i = 1; i < num_per_node; i += 2) {
			if (local_buf[i] < local_buf[i-1]) {
				swap(&local_buf[i], &local_buf[i-1]);
				sorted = false;
			}
		}
		// even-phase
		for (i = 0; i < num_per_node; i += 2) {
			if(i == 0) { continue; }
			if (local_buf[i] < local_buf[i-1]) {
				swap(&local_buf[i], &local_buf[i-1]);
				sorted = false;
			}
		}

		// transportation
		// odd phase
		if (rank % 2) {
			MPI_Send(&local_buf[0], 1, MPI_INT, rank - 1, MSG_RECV, custom_world);
			MPI_Recv(&recv, 1, MPI_INT, rank - 1, MSG_RECV, custom_world, &status);
			if (recv > local_buf[0]) {
				local_buf[0] = recv;
				sorted = false;
			}
		} else if (rank != (size - 1)) {
			MPI_Recv(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world, &status);
			if(recv < local_buf[num_per_node - 1]) {
				swap(&recv, &local_buf[num_per_node - 1]);
				sorted = false;
			}
			MPI_Send(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world);
		}

		// even phase
		if ((rank % 2) == 0 && rank != MASTER_RANK) {
			MPI_Send(&local_buf[0], 1, MPI_INT, rank - 1, MSG_RECV, custom_world);
			MPI_Recv(&recv, 1, MPI_INT, rank - 1, MSG_RECV, custom_world, &status);
			if (recv > local_buf[0]) {
				local_buf[0] = recv;
				sorted = false;
			}
		} else if(rank > MASTER_RANK && rank != (size - 1)) {
			MPI_Recv(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world, &status);
			if(recv < local_buf[num_per_node - 1]) {
				swap(&recv, &local_buf[num_per_node - 1]);
				sorted = false;
			}
			MPI_Send(&recv, 1, MPI_INT, rank + 1, MSG_RECV, custom_world);
		}

		MPI_Allreduce(&sorted, &all_sorted, 1, MPI_INT, MPI_LAND, custom_world);
	}

	// Write file using MPI-IO
	MPI_File_open(custom_world, OUTPUT_NAME, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh_out);
	MPI_File_write_at(fh_out, offset, local_buf, num_per_node, MPI_INT, &status);
	MPI_File_close(&fh_out);

	free(local_buf);

	MPI_Barrier(custom_world);
	MPI_Finalize();

	return 0;
}