예제 #1
0
	stream_offset seek(stream_offset off, std::ios_base::seekdir way) {
		// Advances the read/write head by off characters,
		// returning the new position, where the offset is
		// calculated from:
		//  - the start of the sequence if way == ios_base::beg
		//  - the current position if way == ios_base::cur
		//  - the end of the sequence if way == ios_base::end

		switch (way) {
		case std::ios::beg:
			MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_SET);
			break;
		case std::ios::end:
			MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_END);
			break;
		case std::ios::cur:
			MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_CUR);
			break;
		default:
			abort(); break; // Should never happen
		}

		MPI_Offset pos;
		MPI_File_get_position(mpifile, &pos);

		++seeks;
		return pos;
	}
예제 #2
0
파일: seekf.c 프로젝트: davidheryanto/sc14
FORTRAN_API void FORT_CALL mpi_file_seek_(MPI_Fint *fh,MPI_Offset *offset,int *whence, int *ierr )
{
    MPI_File fh_c;
    
    fh_c = MPI_File_f2c(*fh);
    *ierr = MPI_File_seek(fh_c,*offset,*whence);
}
예제 #3
0
int lemonFinishReading(LemonReader *reader)
{
  int read;
  int size;
  MPI_Status status;
  char MPImode[] = "native";

  if (!reader->is_busy)
    return LEMON_SUCCESS;

  MPI_Comm_size(reader->cartesian, &size);

  MPI_File_read_at_all_end(*reader->fp, reader->buffer, &status);

  reader->pos += reader->bytes_wanted;
  MPI_File_set_view(*reader->fp, reader->off, MPI_BYTE, MPI_BYTE, MPImode, MPI_INFO_NULL);
  MPI_File_seek(*reader->fp, reader->pos, MPI_SEEK_SET);

  MPI_Get_count(&status, MPI_BYTE, &read);

  /* Doing a data read should never get us to EOF, only header scanning */
  if (read != (reader->is_striped ? reader->bytes_wanted / size : reader->bytes_wanted))
  {
    fprintf(stderr, "[LEMON] Node %d reports in lemonFinishReading:\n"
                    "        Could not read the required amount of data.\n", reader->my_rank);
    return LEMON_ERR_READ;
  }

  reader->bytes_wanted = 0;
  reader->buffer = NULL;
  reader->is_busy = 0;
  reader->is_striped = 0;

  return LEMON_SUCCESS;
}
예제 #4
0
Bool MPIStream_SetOffset( Stream* stream, SizeT sizeToWrite, MPI_Comm communicator ) {
	MPI_Offset    offset    = 0;
	int           rank;
	int           nproc;
	unsigned int  localSizeToWrite;
	unsigned int  sizePartialSum;
	
	if ( stream->_file == NULL ) {
		return False;
	}

	if ( stream->_file->type != MPIFile_Type ) {
		return False;
	}
	
	MPI_Comm_rank( communicator, &rank );
	MPI_Comm_size( communicator, &nproc );

	/* Sum up the individual sizeToWrites for processors lower than this one */
	localSizeToWrite = sizeToWrite;
	MPI_Scan( &localSizeToWrite, &sizePartialSum, 1, MPI_UNSIGNED, MPI_SUM, communicator ); 
	/* Now, just subtract the sizeToWrite of current processor to get our start point */
	offset = sizePartialSum - localSizeToWrite;
	
	MPI_File_seek( *(MPI_File*)stream->_file->fileHandle, offset, MPI_SEEK_SET ); 
	
	return True;
}
예제 #5
0
int
main(int argc, char **argv)
{
   /* MPI stuff. */
   MPI_File fh;
   int my_rank, mpi_size;
   int data_in;
   MPI_Status status;

   /* Initialize MPI. */
   MPI_Init(&argc,&argv);
   MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
   /*MPI_Get_processor_name(mpi_name, &mpi_namelen);*/
   /*printf("mpi_name: %s size: %d rank: %d\n", mpi_name, 
     mpi_size, my_rank);*/

   if (my_rank == 0)
   {
      printf("\n*** Testing basic MPI file I/O.\n");
      printf("*** testing file create with parallel I/O with MPI...");
   }

   if (MPI_File_open(MPI_COMM_WORLD, FILE, MPI_MODE_RDWR | MPI_MODE_CREATE,
                     MPI_INFO_NULL, &fh) != MPI_SUCCESS) ERR;
   if (MPI_File_seek(fh, my_rank * sizeof(int), MPI_SEEK_SET) != MPI_SUCCESS) ERR;
   if (MPI_File_write(fh, &my_rank, 1, MPI_INT, &status) != MPI_SUCCESS) ERR;
   if (MPI_File_close(&fh) != MPI_SUCCESS) ERR;

   /* Reopen and check the file. */
   if (MPI_File_open(MPI_COMM_WORLD, FILE, MPI_MODE_RDONLY,
                     MPI_INFO_NULL, &fh) != MPI_SUCCESS) ERR;
   if (MPI_File_seek(fh, my_rank * sizeof(int), MPI_SEEK_SET) != MPI_SUCCESS) ERR;
   if (MPI_File_read(fh, &data_in, 1, MPI_INT, &status) != MPI_SUCCESS) ERR;
   if (data_in != my_rank) ERR;
   if (MPI_File_close(&fh) != MPI_SUCCESS) ERR;

   /* Shut down MPI. */
   MPI_Finalize();

   if (my_rank == 0)
   {
      SUMMARIZE_ERR;
      FINAL_RESULTS;
   }
   return 0;
}
void cache_flush_ind(int myid,
		     int numprocs,
		     int size,
		     char *filename)
{
    char *buf;
    MPI_File fh;
    double time;
    int64_t comp = 0;

    assert(size != 0);

    if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL)
    {
	fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n",
		    MAX_BUFFER_SIZE);
    }
    
    MPI_File_open(MPI_COMM_SELF, filename,
		  MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh);
    MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, 
		      "native", MPI_INFO_NULL);
    MPI_File_seek(fh, 0, MPI_SEEK_SET);

    time = MPI_Wtime();

    while (comp != size)
    {
	if (size - comp > MAX_BUFFER_SIZE)
	{
	    comp += MAX_BUFFER_SIZE;
	    MPI_File_write(fh, buf, MAX_BUFFER_SIZE, 
			   MPI_BYTE, MPI_STATUS_IGNORE);
	}
	else
	{
	    int tmp_bytes = size - comp;
	    comp += size - comp;
	    MPI_File_write(fh, buf, tmp_bytes,
                           MPI_BYTE, MPI_STATUS_IGNORE);

	}
	
    }
    
    free(buf);

    MPI_File_sync(fh);
    time = MPI_Wtime() - time;
    MPI_File_close(&fh);
    MPI_File_delete(filename, MPI_INFO_NULL);
    fprintf(stderr, 
	    "proc %d:cache_flush_ind: File %s written/deleted of "
	    "size %.1f MBytes\n"
	    "Time: %f secs Bandwidth: %f MBytes / sec\n\n",
	    myid,
	    filename, comp*numprocs/1024.0/1024.0,
	    time, comp*numprocs/1024.0/1024.0 / time);
}
예제 #7
0
/*
 * parallel_rewind()
 */
int parallel_rewind(coordinateInfo *C) {
#ifdef MPI
  int err;

  err=MPI_File_seek( *(C->mfp), 0L, MPI_SEEK_SET);
  return err;
#endif
  return 1;
}
예제 #8
0
void ompi_file_seek_f(MPI_Fint *fh, MPI_Offset *offset,
		     MPI_Fint *whence, MPI_Fint *ierr)
{
    int c_ierr;
    MPI_File c_fh = MPI_File_f2c(*fh);

    c_ierr = MPI_File_seek(c_fh, (MPI_Offset) *offset,
                           OMPI_FINT_2_INT(*whence));
    if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr);
}
예제 #9
0
파일: logfiles.c 프로젝트: dud3/ELEVATOR
// whoAreWe .. 0 for elevator, 1 for person
// ourID .. which number are we
// floor .. where are we
void dumpLog(int whoAreWe, int ourID, char* name, char* msg, int floor) {

  // what time is it now?
  time_t lfTime;
  time(&lfTime);
  char lfStrTime[1000];
  struct tm * p = localtime(&lfTime);
  strftime(lfStrTime, 1000, "%c", p);

  char lfBuffer[lfChunkLen+1];

  for (int i = 0; i < lfChunkLen; i++) {
    lfBuffer[i] = ' ';
  }

  char *newBuffer;
  newBuffer = strcpy(lfBuffer, "[");
  newBuffer = strcat(newBuffer, lfStrTime);
  // newBuffer = strcat(newBuffer, ctime(&lfTime));
  newBuffer = strcat(newBuffer, "] ");

  char const lfDigit[] = "0123456789";
  char b[] = "0";
  char c[] = "0";
  char* strID = b;
  strID[0] = lfDigit[ourID];
  char* floorID = c;
  *floorID = lfDigit[floor];

  if (whoAreWe == 0) {
    newBuffer = strcat(strcat(newBuffer, "Elevator "), strID);
  } else {
    newBuffer = strcat(strcat(newBuffer, "Person "), strID);
    newBuffer = strcat(strcat(strcat(newBuffer, " ("), name), ")");
  }

  strcat(strcat(strcat(newBuffer, msg), floorID), ".");
  
  int i = 0;
  while (lfBuffer[i] != '\0') {
    i += 1;
  }
  lfBuffer[i] = ' ';
  
  lfBuffer[lfChunkLen-1] = '\n';
  lfBuffer[lfChunkLen] = '\0';
  
  MPI_File_seek(lfFile, (lfPos * lfSize + lfRank) * lfChunkLen, MPI_SEEK_SET);

  MPI_File_write(lfFile, lfBuffer, lfChunkLen, MPI_CHAR, &lfStatus);

  lfPos += 1;

}
예제 #10
0
int main( int argc, char *argv[] )
{
    int errs = 0;
    int size, rank, i, *buf, rc;
    MPI_File fh;
    MPI_Comm comm;
    MPI_Status status;

    MTest_Init( &argc, &argv );

    comm = MPI_COMM_WORLD;
    MPI_File_open( comm, (char*)"test.ord", 
		   MPI_MODE_RDWR | MPI_MODE_CREATE |
		   MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh );

    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    buf = (int *)malloc( size * sizeof(int) );
    buf[0] = rank;
    rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status );
    if (rc) {
	MTestPrintErrorMsg( "File_write_ordered", rc );
	errs++;
    }
    /* make sure all writes finish before we seek/read */
    MPI_Barrier(comm);
    
    /* Set the individual pointer to 0, since we want to use a read_all */
    MPI_File_seek( fh, 0, MPI_SEEK_SET ); 
    MPI_File_read_all( fh, buf, size, MPI_INT, &status );

    for (i=0; i<size; i++) {
	if (buf[i] != i) {
	    errs++;
	    fprintf( stderr, "%d: buf[%d] = %d\n", rank, i, buf[i] );
	}
    }

    MPI_File_seek_shared( fh, 0, MPI_SEEK_SET );
    for (i=0; i<size; i++) buf[i] = -1;
    MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status );
    if (buf[0] != rank) {
	errs++;
	fprintf( stderr, "%d: buf[0] = %d\n", rank, buf[0] );
    }

    free( buf );
    MPI_File_close( &fh );

    MTest_Finalize( errs );
    MPI_Finalize();
    return 0;
}
예제 #11
0
파일: common.c 프로젝트: georgekw/tma4280
void saveVectorMPI(char* name, const Vector x)
{
  MPI_File f;
  MPI_File_open(*x->comm, name, MPI_MODE_WRONLY | MPI_MODE_CREATE,
                MPI_INFO_NULL, &f);
  MPI_File_seek(f, 17*x->displ[x->comm_rank], MPI_SEEK_SET);
  for (int i=0;i<x->len;++i) {
    char num[21];
    sprintf(num,"%016f ",x->data[i]);
    MPI_File_write(f, num, 17, MPI_CHAR, MPI_STATUS_IGNORE);
  }
  MPI_File_close(&f);
}
예제 #12
0
/* parallel_fseek_end()
 */
int parallel_fseek_end(coordinateInfo *C) {
#ifdef MPI
  int err;

  err=MPI_File_seek( *(C->mfp),0L,MPI_SEEK_END);
  if (err!=MPI_SUCCESS) {
    printMPIerr(err,"trajFile_fseek_end:");
    return 1;
  } else
    return 0;
#endif
  return 1;
}
예제 #13
0
/*
 * parallel_fseek()
 */
int parallel_fseek(coordinateInfo *C, int frame) {
#ifdef MPI
  int err;

  err=MPI_File_seek( *(C->mfp), C->titleSize+(frame*C->frameSize), MPI_SEEK_SET);
  if (err!=MPI_SUCCESS) {
    printMPIerr(err,"trajFile_fseek");
    return 1;
  } else 
    return 0;
#endif
  return 1;
}
예제 #14
0
void ReadCombinedParallelFile(ug::BinaryBuffer &buffer, std::string strFilename, pcl::ProcessCommunicator pc)
{
	MPI_Status status;
	MPI_Comm m_mpiComm = pc.get_mpi_communicator();
	MPI_File fh;

	char filename[1024];
	strcpy(filename, strFilename.c_str());
	if(MPI_File_open(m_mpiComm, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh))
		UG_THROW("could not open "<<filename);

	std::vector<int> allNextOffsets;
	allNextOffsets.resize(pc.size()+1);

	allNextOffsets[0] = (pc.size()+1)*sizeof(int);
	bool bFirst = pc.get_proc_id(0) == pcl::ProcRank();
	if(bFirst)
	{
		int numProcs;
		MPI_File_read(fh, &numProcs, sizeof(numProcs), MPI_BYTE, &status);
		UG_COND_THROW(numProcs != pcl::NumProcs(), "checkPoint numProcs = " << numProcs << ", but running on " << pcl::NumProcs());

		for(size_t i=1; i<allNextOffsets.size(); i++)
		{
			MPI_File_read(fh, &allNextOffsets[i], sizeof(allNextOffsets[i]), MPI_BYTE, &status);
//			UG_LOG("allNextOffsets[" << i << "] = " << allNextOffsets[i] << "\n");
		}
	}
	int myNextOffset, myNextOffset2;
	MPI_Scatter(&allNextOffsets[0], 1, MPI_INT, &myNextOffset, 1, MPI_INT, pc.get_proc_id(0), m_mpiComm);
	MPI_Scatter(&allNextOffsets[1], 1, MPI_INT, &myNextOffset2, 1, MPI_INT, pc.get_proc_id(0), m_mpiComm);

	int mySize = myNextOffset2-myNextOffset;

//	UG_LOG_ALL_PROCS("MySize = " << mySize << "\n" << "myNextOffset = " << myNextOffset << " - " << myNextOffset2 << "\n");

	MPI_File_seek(fh, myNextOffset, MPI_SEEK_SET);

	char *p = new char[mySize];
	MPI_File_read(fh, p, mySize, MPI_BYTE, &status);
	buffer.clear();
	buffer.reserve(mySize);
	buffer.write(p, mySize);
	delete[] p;

	MPI_File_close(&fh);
	//	UG_LOG("File read.\n");
}
예제 #15
0
void WriteCombinedParallelFile(ug::BinaryBuffer &buffer, std::string strFilename, pcl::ProcessCommunicator pc)
{
		MPI_Status status;
	MPI_Comm m_mpiComm = pc.get_mpi_communicator();
	MPI_File fh;
	bool bFirst = pc.get_proc_id(0) == pcl::ProcRank();

	char filename[1024];
	strcpy(filename, strFilename.c_str());
	if(MPI_File_open(m_mpiComm, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh))
		UG_THROW("could not open "<<filename);

	int mySize = buffer.write_pos();
	int myNextOffset = 0;
	MPI_Scan(&mySize, &myNextOffset, 1, MPI_INT, MPI_SUM, m_mpiComm);


	std::vector<int> allNextOffsets;
	allNextOffsets.resize(pc.size(), 0);
	//else allNextOffsets.resize(1);

	myNextOffset += (pc.size()+1)*sizeof(int);
	MPI_Gather(&myNextOffset, 1, MPI_INT, &allNextOffsets[0], 1, MPI_INT, pc.get_proc_id(0), m_mpiComm);

	if(bFirst)
	{
		int numProcs = pcl::NumProcs();
		MPI_File_write(fh, &numProcs, sizeof(numProcs), MPI_BYTE, &status);
		for(size_t i=0; i<allNextOffsets.size(); i++)
		{
//			UG_LOG("allNextOffsets[" << i << "] = " << allNextOffsets[i] << "\n");
			MPI_File_write(fh, &allNextOffsets[i], sizeof(allNextOffsets[i]), MPI_BYTE, &status);
		}
	}

	int myOffset = myNextOffset - buffer.write_pos();
	MPI_File_seek(fh, myOffset, MPI_SEEK_SET);

//	UG_LOG_ALL_PROCS("MySize = " << mySize << "\n" << " myOffset = " << myOffset << "\n");
//	UG_LOG_ALL_PROCS("buffer.write_pos() = " << buffer.write_pos() << "\n" << "(pc.size()+1)*sizeof(size_t) = " << (pc.size()+1)*sizeof(size_t) << "\n");

	MPI_File_write(fh, buffer.buffer(), buffer.write_pos(), MPI_BYTE, &status);

	MPI_File_close(&fh);
}
예제 #16
0
void seissol::checkpoint::mpio::Wavefield::write(const void* header, size_t headerSize)
{
	SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION);

	logInfo(rank()) << "Checkpoint backend: Writing.";

	// Write the header
	writeHeader(header, headerSize);

	// Save data
	SCOREP_USER_REGION_DEFINE(r_write_wavefield);
	SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON);
	checkMPIErr(setDataView(file()));

	unsigned int totalIter = totalIterations();
	unsigned int iter = iterations();
	unsigned int count = dofsPerIteration();
	if (m_useLargeBuffer) {
		totalIter = (totalIter + sizeof(real) - 1) / sizeof(real);
		iter = (iter + sizeof(real) - 1) / sizeof(real);
		count *= sizeof(real);
	}
	unsigned long offset = 0;
	for (unsigned int i = 0; i < totalIter; i++) {
		if (i == iter-1)
			// Last iteration
			count = numDofs() - (iter-1) * count;

		checkMPIErr(MPI_File_write_all(file(), const_cast<real*>(&dofs()[offset]), count, MPI_DOUBLE, MPI_STATUS_IGNORE));

		if (i < iter-1)
			offset += count;
		// otherwise we just continue writing the last chunk over and over
		else if (i != totalIter-1)
			checkMPIErr(MPI_File_seek(file(), -count * sizeof(real), MPI_SEEK_CUR));
	}

	SCOREP_USER_REGION_END(r_write_wavefield);

	// Finalize the checkpoint
	finalizeCheckpoint();

	logInfo(rank()) << "Checkpoint backend: Writing. Done.";
}
예제 #17
0
파일: logfiles.c 프로젝트: dud3/ELEVATOR
void wordCountLog(char* key, int value) {

  // what time is it now?
  time_t lfTime;
  time(&lfTime);
  char lfStrTime[1000];
  struct tm * p = localtime(&lfTime);
  strftime(lfStrTime, 1000, "%c", p);

  char lfBuffer[lfChunkLen+1];

  for (int i = 0; i < lfChunkLen; i++) {
    lfBuffer[i] = ' ';
  }

  char *newBuffer;
  newBuffer = strcpy(lfBuffer, "[");
  newBuffer = strcat(newBuffer, lfStrTime);
  // newBuffer = strcat(newBuffer, ctime(&lfTime));
  newBuffer = strcat(newBuffer, "] ");

  char const lfDigit[] = "0123456789";
  char b[] = "0";
  char* valID = b;
  valID[0] = lfDigit[value];

  newBuffer = strcat(strcat(strcat(newBuffer, key), ": "), valID);
  
  int i = 0;
  while (lfBuffer[i] != '\0') {
    i += 1;
  }
  lfBuffer[i] = ' ';
  
  lfBuffer[lfChunkLen-1] = '\n';
  lfBuffer[lfChunkLen] = '\0';
  
  MPI_File_seek(lfFile, (lfPos * lfSize + lfRank) * lfChunkLen, MPI_SEEK_SET);

  MPI_File_write(lfFile, lfBuffer, lfChunkLen, MPI_CHAR, &lfStatus);

  lfPos += 1;

}
예제 #18
0
파일: aiori-MPIIO.c 프로젝트: gcongiu/E10
static IOR_offset_t
SeekOffset_MPIIO(MPI_File       fd,
                 IOR_offset_t   offset,
                 IOR_param_t  * param)
{
    int          offsetFactor,
                 tasksPerFile;
    IOR_offset_t tempOffset;

    tempOffset = offset;

    if (param->filePerProc) {
        offsetFactor = 0;
        tasksPerFile = 1;
    } else {
        offsetFactor = (rank + rankOffset) % param->numTasks;
        tasksPerFile = param->numTasks;
    }
    if (param->useFileView) {
        /* recall that offsets in a file view are
           counted in units of transfer size */
        if (param->filePerProc) {
            tempOffset = tempOffset / param->transferSize;
        } else {
            /* 
             * this formula finds a file view offset for a task
             * from an absolute offset
             */
            tempOffset = ((param->blockSize / param->transferSize)
                          * (tempOffset / (param->blockSize * tasksPerFile)))
                         + (((tempOffset % (param->blockSize * tasksPerFile))
                          - (offsetFactor * param->blockSize))
                           / param->transferSize);
        }
    }
    MPI_CHECK(MPI_File_seek(fd, tempOffset, MPI_SEEK_SET),
              "cannot seek offset");
    return(offset);
} /* SeekOffset_MPIIO() */
예제 #19
0
void
PullInMPI_IOSymbols()
{
#ifdef PARALLEL

    //Don't call this!
    EXCEPTION1(ImproperUseException, "Do not call PullInMPI_IOSymbols");

    MPI_Info info;
    MPI_File fh;
    MPI_Offset sz;
    char *nm;
    int whence;
    void *buf;
    int count;
    MPI_Datatype datatype;
    MPI_Status status;

    MPI_File_open(VISIT_MPI_COMM, nm, 0, info, &fh);
    MPI_File_get_size(fh, &sz);
    MPI_File_seek(fh, sz, whence);
    MPI_File_read(fh, buf, count, datatype, &status);
#endif
}
예제 #20
0
int main(int argc, char **argv)
{
    MPI_File fh;
    MPI_Status status;
    MPI_Offset size;
    long long *buf, i;
    char *filename;
    int j, mynod, nprocs, len, flag, err;

    MPI_Init(&argc,&argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

    if (nprocs != 1) {
	fprintf(stderr, "Run this program on one process only\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }

    i = 1;
    while ((i < argc) && strcmp("-fname", *argv)) {
	i++;
	argv++;
    }
    if (i >= argc) {
	fprintf(stderr, "\n*#  Usage: large -fname filename\n\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }
    argv++;
    len = strlen(*argv);
    filename = (char *) malloc(len+1);
    strcpy(filename, *argv);
    fprintf(stderr, "This program creates an 4 Gbyte file. Don't run it if you don't have that much disk space!\n");

    buf = (long long *) malloc(SIZE * sizeof(long long));
    if (!buf) {
	fprintf(stderr, "not enough memory to allocate buffer\n");
	MPI_Abort(MPI_COMM_WORLD, 1);
    }

    MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR,
                  MPI_INFO_NULL, &fh);

    for (i=0; i<NTIMES; i++) {
	for (j=0; j<SIZE; j++)
	    buf[j] = i*SIZE + j;
	
	err = MPI_File_write(fh, buf, SIZE, MPI_DOUBLE, &status);
        /* MPI_DOUBLE because not all MPI implementations define
           MPI_LONG_LONG_INT, even though the C compiler supports long long. */
        if (err != MPI_SUCCESS) {
	    fprintf(stderr, "MPI_File_write returned error\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
    }

    MPI_File_get_size(fh, &size);
    fprintf(stderr, "file size = %lld bytes\n", size);

    MPI_File_seek(fh, 0, MPI_SEEK_SET);

    for (j=0; j<SIZE; j++) buf[j] = -1;

    flag = 0;
    for (i=0; i<NTIMES; i++) {
	err = MPI_File_read(fh, buf, SIZE, MPI_DOUBLE, &status);
        /* MPI_DOUBLE because not all MPI implementations define
           MPI_LONG_LONG_INT, even though the C compiler supports long long. */
        if (err != MPI_SUCCESS) {
	    fprintf(stderr, "MPI_File_write returned error\n");
	    MPI_Abort(MPI_COMM_WORLD, 1);
	}
	for (j=0; j<SIZE; j++) 
	    if (buf[j] != i*SIZE + j) {
		fprintf(stderr, "error: buf %d is %lld, should be %lld \n", j, buf[j], 
                                 i*SIZE + j);
		flag = 1;
	    }
    }

    if (!flag) fprintf(stderr, "Data read back is correct\n");
    MPI_File_close(&fh);

    free(buf);
    free(filename);
    MPI_Finalize(); 
    return 0;
}
예제 #21
0
파일: floyd2d.c 프로젝트: dungtn/mpi-floyd
int main(int argc, char *argv[]) {

	int i, n, nlocal;
	int numprocs, dims[2], periods[2], keep_dims[2];
	int myrank, my2drank, mycoords[2];
	MPI_File f; char* filename = "input/16";
	MPI_Comm comm_2d, comm_row, comm_col;
	MPI_Status status;

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

	dims[ROW] = dims[COL] = sqrt(numprocs);

	periods[ROW] = periods[COL] = 1;
	MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);

	MPI_Comm_rank(comm_2d, &my2drank);
	MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

	keep_dims[ROW] = 0;
	keep_dims[COL] = 1;
	MPI_Cart_sub(comm_2d, keep_dims, &comm_row);

	keep_dims[ROW] = 1;
	keep_dims[COL] = 0;
	MPI_Cart_sub(comm_2d, keep_dims, &comm_col);

	if(MPI_File_open(comm_2d, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
		fprintf(stderr, "Cannot open file %s\n", filename);
		MPI_Abort(comm_2d, FILE_NOT_FOUND);
		MPI_Finalize();
		return 1;
	}
	MPI_File_seek(f, 0, MPI_SEEK_SET);
	MPI_File_read(f, &n, 1, MPI_INT, &status); nlocal = n/dims[ROW];

	int *a = (int *)malloc(nlocal * nlocal * sizeof(int));
	for(i = 0; i < nlocal; i++) {
		MPI_File_seek(f, ((mycoords[0] * nlocal  + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET);
		MPI_File_read(f, &a[i * nlocal], nlocal, MPI_INT, &status);
	}
	MPI_File_close(&f);

 	int j;
	if(my2drank == 3) {
		for(i = 0; i < nlocal; i++) {
			for(j = 0; j < nlocal; j++) {
				printf("%d ", a[i * nlocal +j]);
			}
			printf("\n");
		}
	}

	double start = MPI_Wtime();
	floyd_all_pairs_sp_2d(n, nlocal, a, comm_2d, comm_row, comm_col);
	double stop = MPI_Wtime();
	printf("[%d] Completed in %1.3f seconds\n", my2drank, stop-start);

	MPI_Comm_free(&comm_col);
	MPI_Comm_free(&comm_row);
	if(my2drank == 3) {
		for(i = 0; i < nlocal; i++) {
			for(j = 0; j < nlocal; j++) {
				printf("%d ", a[i * nlocal +j]);
			}
			printf("\n");
		}
	}
	if(MPI_File_open(comm_2d, "output/16", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) {
			printf("Cannot open file %s\n", "out");
			MPI_Abort(comm_2d, FILE_NOT_FOUND);
			MPI_Finalize();
			return 1;
	}
	if(my2drank == 0) {
		MPI_File_seek(f, 0, MPI_SEEK_SET);
		MPI_File_write(f, &n, 1, MPI_INT, &status);
	}
	for(i = 0; i < nlocal; i++) {
		MPI_File_seek(f, ((mycoords[0] * nlocal  + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET);
		MPI_File_write(f, &a[i * nlocal], nlocal, MPI_INT, &status);
	}

	MPI_File_close(&f);
	free(a);

	MPI_Comm_free(&comm_2d);
	MPI_Finalize();
	return 0;
}
예제 #22
0
파일: plas.cpp 프로젝트: jwend/p_las2las
int main(int argc, char *argv[])
{
  int i;
  int is_mpi = 1;
  int debug = 0;
  bool verbose = false;
  bool force = false;
  // fixed header changes 
  int set_version_major = -1;
  int set_version_minor = -1;
  int set_point_data_format = -1;
  int set_point_data_record_length = -1;
  int set_gps_time_endcoding = -1;
  // variable header changes
  bool remove_extra_header = false;
  bool remove_all_variable_length_records = false;
  int remove_variable_length_record = -1;
  int remove_variable_length_record_from = -1;
  int remove_variable_length_record_to = -1;
  bool remove_tiling_vlr = false;
  bool remove_original_vlr = false;
  // extract a subsequence
  //unsigned int subsequence_start = 0;
  //unsigned int subsequence_stop = U32_MAX;
  I64 subsequence_start = 0;
  I64 subsequence_stop = I64_MAX;


  // fix files with corrupt points
  bool clip_to_bounding_box = false;
  double start_time = 0;
  time_t wall_start_time;
  time_t wall_end_time;
  LASreadOpener lasreadopener;
  //if(is_mpi)lasreadopener.setIsMpi(TRUE);
  GeoProjectionConverter geoprojectionconverter;
  LASwriteOpener laswriteopener;
  if(is_mpi)laswriteopener.setIsMpi(TRUE);


  int process_count = 1;
  int rank = 0;
  start_time = taketime();
  time(&wall_start_time);

  if (is_mpi){
      MPI_Init(&argc,&argv);
      MPI_Comm_size(MPI_COMM_WORLD,&process_count);
      MPI_Comm_rank(MPI_COMM_WORLD,&rank);
      if(debug) printf ("MPI task %d has started...\n", rank);
  }



  if (argc == 1)
  {

    fprintf(stderr,"las2las.exe is better run in the command line or via the lastool.exe GUI\n");
    char file_name[256];
    fprintf(stderr,"enter input file: "); fgets(file_name, 256, stdin);
    file_name[strlen(file_name)-1] = '\0';
    lasreadopener.set_file_name(file_name);
    fprintf(stderr,"enter output file: "); fgets(file_name, 256, stdin);
    file_name[strlen(file_name)-1] = '\0';
    laswriteopener.set_file_name(file_name);

  }
  else
  {
    for (i = 1; i < argc; i++)
    {
      //if (argv[i][0] == '�') argv[i][0] = '-';
      if (strcmp(argv[i],"-week_to_adjusted") == 0)
      {
        set_gps_time_endcoding = 1;
      }
      else if (strcmp(argv[i],"-adjusted_to_week") == 0)
      {
        set_gps_time_endcoding = 0;
      }
    }
    if (!geoprojectionconverter.parse(argc, argv)) byebye(true);
    if (!lasreadopener.parse(argc, argv)) byebye(true);
    if (!laswriteopener.parse(argc, argv)) byebye(true);
  }

  for (i = 1; i < argc; i++)
  {
    if (argv[i][0] == '\0')
    {
      continue;
    }
    else if (strcmp(argv[i],"-h") == 0 || strcmp(argv[i],"-help") == 0)
    {
      fprintf(stderr, "LAStools (by [email protected]) version %d\n", LAS_TOOLS_VERSION);
      usage();
    }
    else if (strcmp(argv[i],"-v") == 0 || strcmp(argv[i],"-verbose") == 0)
    {
      verbose = true;
    }
    else if (strcmp(argv[i],"-version") == 0)
    {
      fprintf(stderr, "LAStools (by [email protected]) version %d\n", LAS_TOOLS_VERSION);
      byebye();
    }
    else if (strcmp(argv[i],"-gui") == 0)
    {

      fprintf(stderr, "WARNING: not compiled with GUI support. ignoring '-gui' ...\n");

    }
    else if (strcmp(argv[i],"-cores") == 0)
    {

      fprintf(stderr, "WARNING: not compiled with multi-core batching. ignoring '-cores' ...\n");
      i++;

    }
    else if (strcmp(argv[i],"-force") == 0)
    {
      force = true;
    }
    else if (strcmp(argv[i],"-subseq") == 0)
    {
      if ((i+2) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 2 arguments: start stop\n", argv[i]);
        byebye(true);
      }
      subsequence_start = (unsigned int)atoi(argv[i+1]); subsequence_stop = (unsigned int)atoi(argv[i+2]);
      i+=2;
    }
    else if (strcmp(argv[i],"-start_at_point") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: start\n", argv[i]);
        byebye(true);
      }
      subsequence_start = (unsigned int)atoi(argv[i+1]);
      i+=1;
    }
    else if (strcmp(argv[i],"-stop_at_point") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: stop\n", argv[i]);
        byebye(true);
      }
      subsequence_stop = (unsigned int)atoi(argv[i+1]);
      i+=1;
    }
    else if (strcmp(argv[i],"-set_version") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: major.minor\n", argv[i]);
        byebye(true);
      }
      if (sscanf(argv[i+1],"%d.%d",&set_version_major,&set_version_minor) != 2)
      {
        fprintf(stderr, "ERROR: cannot understand argument '%s' for '%s'\n", argv[i+1], argv[i]);
        usage(true);
      }
      i+=1;
    }
    else if (strcmp(argv[i],"-set_version_major") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: major\n", argv[i]);
        byebye(true);
      }
      set_version_major = atoi(argv[i+1]);
      i+=1;
    }
    else if (strcmp(argv[i],"-set_version_minor") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: minor\n", argv[i]);
        byebye(true);
      }
      set_version_minor = atoi(argv[i+1]);
      i+=1;
    }
    else if (strcmp(argv[i],"-remove_extra") == 0)
    {
      remove_extra_header = true;
    }
    else if (strcmp(argv[i],"-remove_all_vlrs") == 0)
    {
      remove_all_variable_length_records = true;
    }
    else if (strcmp(argv[i],"-remove_vlr") == 0)
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: number\n", argv[i]);
        byebye(true);
      }
      remove_variable_length_record = atoi(argv[i+1]);
      remove_variable_length_record_from = -1;
      remove_variable_length_record_to = -1;
      i++;
    }
    else if (strcmp(argv[i],"-remove_vlrs_from_to") == 0)
    {
      if ((i+2) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 2 arguments: start end\n", argv[i]);
        byebye(true);
      }
      remove_variable_length_record = -1;
      remove_variable_length_record_from = atoi(argv[i+1]);
      remove_variable_length_record_to = atoi(argv[i+2]);
      i+=2;
    }
    else if (strcmp(argv[i],"-remove_tiling_vlr") == 0)
    {
      remove_tiling_vlr = true;
      i++;
    }
    else if (strcmp(argv[i],"-remove_original_vlr") == 0)
    {
      remove_original_vlr = true;
      i++;
    }
    else if (strcmp(argv[i],"-set_point_type") == 0 || strcmp(argv[i],"-set_point_data_format") == 0 || strcmp(argv[i],"-point_type") == 0) 
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: type\n", argv[i]);
        byebye(true);
      }
      set_point_data_format = atoi(argv[i+1]);
      i++;
    }
    else if (strcmp(argv[i],"-set_point_data_record_length") == 0 || strcmp(argv[i],"-set_point_size") == 0 || strcmp(argv[i],"-point_size") == 0) 
    {
      if ((i+1) >= argc)
      {
        fprintf(stderr,"ERROR: '%s' needs 1 argument: size\n", argv[i]);
        byebye(true);
      }
      set_point_data_record_length = atoi(argv[i+1]);
      i++;
    }
    else if (strcmp(argv[i],"-clip_to_bounding_box") == 0 || strcmp(argv[i],"-clip_to_bb") == 0) 
    {
      clip_to_bounding_box = true;
    }
    else if ((argv[i][0] != '-') && (lasreadopener.get_file_name_number() == 0))
    {
      lasreadopener.add_file_name(argv[i]);
      argv[i][0] = '\0';
    }
    else
    {
      fprintf(stderr, "ERROR: cannot understand argument '%s'\n", argv[i]);
      usage(true);
    }
  }



  // check input

  if (!lasreadopener.active())
  {
    fprintf(stderr,"ERROR: no input specified\n");
    usage(true, argc==1);
  }
  
  BOOL extra_pass = laswriteopener.is_piped();

  // for piped output we need an extra pass

  if (extra_pass)
  {
    if (lasreadopener.is_piped())
    {
      fprintf(stderr, "ERROR: input and output cannot both be piped\n");
      usage(true);
    }
  }

  // make sure we do not corrupt the input file

  if (lasreadopener.get_file_name() && laswriteopener.get_file_name() && (strcmp(lasreadopener.get_file_name(), laswriteopener.get_file_name()) == 0))
  {
    fprintf(stderr, "ERROR: input and output file name are identical\n");
    usage(true);
  }
    
  // possibly loop over multiple input files

  while (lasreadopener.active())
  {
   // if (verbose) start_time = taketime();

    // open lasreader

    LASreader* lasreader = lasreadopener.open();

    if (lasreader == 0)
    {
      fprintf(stderr, "ERROR: could not open lasreader\n");
      usage(true, argc==1);
    }

    // store the inventory for the header

    LASinventory lasinventory;

    // the point we write sometimes needs to be copied

    LASpoint* point = 0;

    // prepare the header for output

    if (set_gps_time_endcoding != -1)
    {
      if (set_gps_time_endcoding == 0)
      {
        if ((lasreader->header.global_encoding & 1) == 0)
        {
          fprintf(stderr, "WARNING: global encoding indicates file already in GPS week time\n");
          if (force)
          {
            fprintf(stderr, "         forced conversion.\n");
          }
          else
          {
            fprintf(stderr, "         use '-force' to force conversion.\n");
            byebye(true);
          }
        }
        else
        {
          lasreader->header.global_encoding &= ~1;
        }
      }
      else if (set_gps_time_endcoding == 1)
      {
        if ((lasreader->header.global_encoding & 1) == 1)
        {
          fprintf(stderr, "WARNING: global encoding indicates file already in Adjusted Standard GPS time\n");
          if (force)
          {
            fprintf(stderr, "         forced conversion.\n");
          }
          else
          {
            fprintf(stderr, "         use '-force' to force conversion.\n");
            byebye(true);
          }
        }
        else
        {
          lasreader->header.global_encoding |= 1;
        }
      }
    }

    if (set_version_major != -1)
    {
      if (set_version_major != 1)
      {
        fprintf(stderr, "ERROR: unknown version_major %d\n", set_version_major);
        byebye(true);
      }
      lasreader->header.version_major = (U8)set_version_major;
    }

    if (set_version_minor >= 0)
    {
      if (set_version_minor > 4)
      {
        fprintf(stderr, "ERROR: unknown version_minor %d\n", set_version_minor);
        byebye(true);
      }
      if (set_version_minor < 3)
      {
        if (lasreader->header.version_minor == 3)
        {
          lasreader->header.header_size -= 8;
          lasreader->header.offset_to_point_data -= 8;
        }
        else if (lasreader->header.version_minor >= 4)
        {
          lasreader->header.header_size -= (8 + 140);
          lasreader->header.offset_to_point_data -= (8 + 140);
        }
      }
      else if (set_version_minor == 3)
      {
        if (lasreader->header.version_minor < 3)
        {
          lasreader->header.header_size += 8;
          lasreader->header.offset_to_point_data += 8;
          lasreader->header.start_of_waveform_data_packet_record = 0;
        }
        else if (lasreader->header.version_minor >= 4)
        {
          lasreader->header.header_size -= 140;
          lasreader->header.offset_to_point_data -= 140;
        }
      }
      else if (set_version_minor == 4) 
      {
        if (lasreader->header.version_minor < 3)
        {
          lasreader->header.header_size += (8 + 140);
          lasreader->header.offset_to_point_data += (8 + 140);
          lasreader->header.start_of_waveform_data_packet_record = 0;
        }
        else if (lasreader->header.version_minor == 3)
        {
          lasreader->header.header_size += 140;
          lasreader->header.offset_to_point_data += 140;
        }
      }

      if ((set_version_minor <= 3) && (lasreader->header.version_minor >= 4))
      {
        if (lasreader->header.point_data_format > 5)
        {
          switch (lasreader->header.point_data_format)
          {
          case 6:
            fprintf(stderr, "WARNING: downgrading point_data_format from %d to 1\n", lasreader->header.point_data_format);
            lasreader->header.point_data_format = 1;
            fprintf(stderr, "         and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2);
            lasreader->header.point_data_record_length -= 2;
            break;
          case 7:
            fprintf(stderr, "WARNING: downgrading point_data_format from %d to 3\n", lasreader->header.point_data_format);
            lasreader->header.point_data_format = 3;
            fprintf(stderr, "         and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2);
            lasreader->header.point_data_record_length -= 2;
            break;
          case 8:
            fprintf(stderr, "WARNING: downgrading point_data_format from %d to 3\n", lasreader->header.point_data_format);
            lasreader->header.point_data_format = 3;
            fprintf(stderr, "         and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 4);
            lasreader->header.point_data_record_length -= 4;
            break;
          case 9:
            fprintf(stderr, "WARNING: downgrading point_data_format from %d to 4\n", lasreader->header.point_data_format);
            lasreader->header.point_data_format = 4;
            fprintf(stderr, "         and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2);
            lasreader->header.point_data_record_length -= 2;
            break;
          case 10:
            fprintf(stderr, "WARNING: downgrading point_data_format from %d to 5\n", lasreader->header.point_data_format);
            lasreader->header.point_data_format = 5;
            fprintf(stderr, "         and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 4);
            lasreader->header.point_data_record_length -= 4;
            break;
          default:
            fprintf(stderr, "ERROR: unknown point_data_format %d\n", lasreader->header.point_data_format);
            byebye(true);
          }
        }
        point = new LASpoint;
        point->init(&lasreader->header, lasreader->header.point_data_format, lasreader->header.point_data_record_length);
      }

      lasreader->header.version_minor = (U8)set_version_minor;
    }

    // are we supposed to change the point data format

    if (set_point_data_format != -1)
    {
      if (set_point_data_format < 0 || set_point_data_format > 10)
      {
        fprintf(stderr, "ERROR: unknown point_data_format %d\n", set_point_data_format);
        byebye(true);
      }
      // depending on the conversion we may need to copy the point
      if (convert_point_type_from_to[lasreader->header.point_data_format][set_point_data_format])
      {
        if (point == 0) point = new LASpoint;
      }
      lasreader->header.point_data_format = (U8)set_point_data_format;
      lasreader->header.clean_laszip();
      switch (lasreader->header.point_data_format)
      {
      case 0:
        lasreader->header.point_data_record_length = 20;
        break;
      case 1:
        lasreader->header.point_data_record_length = 28;
        break;
      case 2:
        lasreader->header.point_data_record_length = 26;
        break;
      case 3:
        lasreader->header.point_data_record_length = 34;
        break;
      case 4:
        lasreader->header.point_data_record_length = 57;
        break;
      case 5:
        lasreader->header.point_data_record_length = 63;
        break;
      case 6:
        lasreader->header.point_data_record_length = 30;
        break;
      case 7:
        lasreader->header.point_data_record_length = 36;
        break;
      case 8:
        lasreader->header.point_data_record_length = 38;
        break;
      case 9:
        lasreader->header.point_data_record_length = 59;
        break;
      case 10:
        lasreader->header.point_data_record_length = 67;
        break;
      }
    }

    // are we supposed to change the point data record length

    if (set_point_data_record_length != -1)
    {
      I32 num_extra_bytes = 0;
      switch (lasreader->header.point_data_format)
      {
      case 0:
        num_extra_bytes = set_point_data_record_length - 20;
        break;
      case 1:
        num_extra_bytes = set_point_data_record_length - 28;
        break;
      case 2:
        num_extra_bytes = set_point_data_record_length - 26;
        break;
      case 3:
        num_extra_bytes = set_point_data_record_length - 34;
        break;
      case 4:
        num_extra_bytes = set_point_data_record_length - 57;
        break;
      case 5:
        num_extra_bytes = set_point_data_record_length - 63;
        break;
      case 6:
        num_extra_bytes = set_point_data_record_length - 30;
        break;
      case 7:
        num_extra_bytes = set_point_data_record_length - 36;
        break;
      case 8:
        num_extra_bytes = set_point_data_record_length - 38;
        break;
      case 9:
        num_extra_bytes = set_point_data_record_length - 59;
        break;
      case 10:
        num_extra_bytes = set_point_data_record_length - 67;
        break;
      }
      if (num_extra_bytes < 0)
      {
        fprintf(stderr, "ERROR: point_data_format %d needs record length of at least %d\n", lasreader->header.point_data_format, set_point_data_record_length - num_extra_bytes);
        byebye(true);
      }
      if (lasreader->header.point_data_record_length < set_point_data_record_length)
      {
        if (!point) point = new LASpoint;
      }
      lasreader->header.point_data_record_length = (U16)set_point_data_record_length;
      lasreader->header.clean_laszip();
    }

    // if the point needs to be copied set up the data fields

    if (point)
    {
      point->init(&lasreader->header, lasreader->header.point_data_format, lasreader->header.point_data_record_length);
    }

    // maybe we should remove some stuff

    if (remove_extra_header)
    {
      lasreader->header.clean_user_data_in_header();
      lasreader->header.clean_user_data_after_header();
    }

    if (remove_all_variable_length_records)
    {
      lasreader->header.clean_vlrs();
    }
    else
    {
      if (remove_variable_length_record != -1)
      {
        lasreader->header.remove_vlr(remove_variable_length_record);
      }
    
      if (remove_variable_length_record_from != -1)
      {
        for (i = remove_variable_length_record_to; i >= remove_variable_length_record_from; i--)
        {
          lasreader->header.remove_vlr(i);
        }
      }
    }

    if (remove_tiling_vlr)
    {
      lasreader->header.clean_lastiling();
    }

    if (remove_original_vlr)
    {
      lasreader->header.clean_lasoriginal();
    }

    // maybe we should add / change the projection information
    LASquantizer* reproject_quantizer = 0;
    LASquantizer* saved_quantizer = 0;
    if (geoprojectionconverter.has_projection(true) || geoprojectionconverter.has_projection(false))
    {
      if (!geoprojectionconverter.has_projection(true) && lasreader->header.vlr_geo_keys)
      {
        geoprojectionconverter.set_projection_from_geo_keys(lasreader->header.vlr_geo_keys[0].number_of_keys, (GeoProjectionGeoKeys*)lasreader->header.vlr_geo_key_entries, lasreader->header.vlr_geo_ascii_params, lasreader->header.vlr_geo_double_params);
      }

      if (geoprojectionconverter.has_projection(true) && geoprojectionconverter.has_projection(false))
      {
        reproject_quantizer = new LASquantizer();
        double point[3];
        point[0] = (lasreader->header.min_x+lasreader->header.max_x)/2;
        point[1] = (lasreader->header.min_y+lasreader->header.max_y)/2;
        point[2] = (lasreader->header.min_z+lasreader->header.max_z)/2;
        geoprojectionconverter.to_target(point);
        reproject_quantizer->x_scale_factor = geoprojectionconverter.get_target_precision();
        reproject_quantizer->y_scale_factor = geoprojectionconverter.get_target_precision();
        reproject_quantizer->z_scale_factor = lasreader->header.z_scale_factor;
        reproject_quantizer->x_offset = ((I64)((point[0]/reproject_quantizer->x_scale_factor)/10000000))*10000000*reproject_quantizer->x_scale_factor;
        reproject_quantizer->y_offset = ((I64)((point[1]/reproject_quantizer->y_scale_factor)/10000000))*10000000*reproject_quantizer->y_scale_factor;
        reproject_quantizer->z_offset = ((I64)((point[2]/reproject_quantizer->z_scale_factor)/10000000))*10000000*reproject_quantizer->z_scale_factor;
      }

      int number_of_keys;
      GeoProjectionGeoKeys* geo_keys = 0;
      int num_geo_double_params;
      double* geo_double_params = 0;

      if (geoprojectionconverter.get_geo_keys_from_projection(number_of_keys, &geo_keys, num_geo_double_params, &geo_double_params, !geoprojectionconverter.has_projection(false)))
      {
        lasreader->header.set_geo_keys(number_of_keys, (LASvlr_key_entry*)geo_keys);
        free(geo_keys);
        if (geo_double_params)
        {
          lasreader->header.set_geo_double_params(num_geo_double_params, geo_double_params);
          free(geo_double_params);
        }
        else
        {
          lasreader->header.del_geo_double_params();
        }
        lasreader->header.del_geo_ascii_params();
      }
    }

    // do we need an extra pass

    BOOL extra_pass = laswriteopener.is_piped();

    // for piped output we need an extra pass

    if (extra_pass)
    {
      if (lasreadopener.is_piped())
      {
        fprintf(stderr, "ERROR: input and output cannot both be piped\n");
        usage(true);
      }


      if (verbose) fprintf(stderr, "extra pass for piped output: reading %lld points ...\n", lasreader->npoints);


      // maybe seek to start position

      if (subsequence_start) lasreader->seek(subsequence_start);

      while (lasreader->read_point())

      {
        if (lasreader->p_count > subsequence_stop) break;

        if (clip_to_bounding_box)
        {
          if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z))
          {
            continue;
          }
        }

        if (reproject_quantizer)
        {
          lasreader->point.compute_coordinates();
          geoprojectionconverter.to_target(lasreader->point.coordinates);
          lasreader->point.compute_XYZ(reproject_quantizer);
        }
        lasinventory.add(&lasreader->point);
      }
      lasreader->close();

      lasreader->header.number_of_point_records = lasinventory.number_of_point_records;
      for (i = 0; i < 5; i++) lasreader->header.number_of_points_by_return[i] = lasinventory.number_of_points_by_return[i+1];
      if (reproject_quantizer) lasreader->header = *reproject_quantizer;
      lasreader->header.max_x = lasreader->header.get_x(lasinventory.max_X);
      lasreader->header.min_x = lasreader->header.get_x(lasinventory.min_X);
      lasreader->header.max_y = lasreader->header.get_y(lasinventory.max_Y);
      lasreader->header.min_y = lasreader->header.get_y(lasinventory.min_Y);
      lasreader->header.max_z = lasreader->header.get_z(lasinventory.max_Z);
      lasreader->header.min_z = lasreader->header.get_z(lasinventory.min_Z);

     // if (verbose) { fprintf(stderr,"extra pass took %g sec.\n", taketime()-start_time); start_time = taketime(); }

      if (verbose) fprintf(stderr, "piped output: reading %lld and writing %d points ...\n", lasreader->npoints, lasinventory.number_of_point_records);

    }
    else
    {
      if (reproject_quantizer)
      {
        saved_quantizer = new LASquantizer();
        *saved_quantizer = lasreader->header;
        lasreader->header = *reproject_quantizer;
      }

      //if (verbose) fprintf(stderr, "reading %lld and writing all surviving points ...\n", lasreader->npoints);

    }

    // check output

    if (!laswriteopener.active())
    {
      // create name from input name
      laswriteopener.make_file_name(lasreadopener.get_file_name());
    }

    // prepare the header for the surviving points

    strncpy(lasreader->header.system_identifier, "LAStools (c) by rapidlasso GmbH", 32);
    lasreader->header.system_identifier[31] = '\0';
    char temp[64];
    sprintf(temp, "las2las (version %d)", LAS_TOOLS_VERSION);
    strncpy(lasreader->header.generating_software, temp, 32);
    lasreader->header.generating_software[31] = '\0';


    LASwriter* laswriter = 0;
    // open laswriter
    if(is_mpi){
	// remove any existing out file, before opening with MPI_File_open
	if(rank==0){
	    remove(laswriteopener.get_file_name());
	}
	MPI_Barrier(MPI_COMM_WORLD);
    }


    laswriter = laswriteopener.open(&lasreader->header);
    if (laswriter == 0)
    {
         fprintf(stderr, "ERROR: could not open laswriter\n");
         byebye(true, argc==1);
    }
    // **************************************************************************************************
    if(is_mpi == 1){ // jdw, we do this because only rank 0 now writes the header in laswriter_las.cpp
      MPI_File fh = laswriter->get_MPI_File();
      MPI_Offset offset;
      //MPI_File_get_position(fh, &offset);
      //printf ("offset %lld, rank %i fh %lld\n", offset, rank, fh);
      if(rank==0){
           MPI_File_get_position(fh, &offset);
      }
      MPI_Bcast(&offset, 1, MPI_OFFSET, 0, MPI_COMM_WORLD);
      MPI_Barrier(MPI_COMM_WORLD);
      MPI_File_seek(fh, offset, MPI_SEEK_SET);

    }
    // ****************************************************************************************************



    // for piped output we need to re-open the input file

    if (extra_pass)
    {
      if (!lasreadopener.reopen(lasreader))
      {
        fprintf(stderr, "ERROR: could not re-open lasreader\n");
        byebye(true);
      }
    }
    else
    {
      if (reproject_quantizer)
      {
        lasreader->header = *saved_quantizer;
        delete saved_quantizer;
      }
    }

    // maybe seek to start position

    if (subsequence_start) lasreader->seek(subsequence_start);

    // loop over points

    if (point)
    {

      while (lasreader->read_point())

      {
        if (lasreader->p_count > subsequence_stop) break;

        if (clip_to_bounding_box)
        {
          if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z))
          {
            continue;
          }
        }

        if (reproject_quantizer)
        {
          lasreader->point.compute_coordinates();
          geoprojectionconverter.to_target(lasreader->point.coordinates);
          lasreader->point.compute_XYZ(reproject_quantizer);
        }
        *point = lasreader->point;
        laswriter->write_point(point);
        // without extra pass we need inventory of surviving points
        if (!extra_pass) laswriter->update_inventory(point);
      }
      delete point;
      point = 0;
    }
    else // ***************************** MPI ********************************************************
    {
      // ***** Determine the start and stop points for this process *****
      I64 left_over_count = lasreader->npoints % process_count;
      I64 process_points = lasreader->npoints / process_count;
      subsequence_start = rank*process_points;
      subsequence_stop =  subsequence_start + process_points;
      if(rank == process_count-1) subsequence_stop += left_over_count;

      // ***** Set the input stream file offset for this process *****
      // subsequence_start parameter gets cast to U32 in the implementation of seek and overflows for large files
      // manually set the file offset instead for now
      //((LASreaderLAS*)lasreader)->stream->seek(subsequence_start);
      I64 header_end_read_position = lasreader->get_Stream()->tell();


      //printf("header end %lld subseqence_start * 28 %lld rank %i\n", header_end_read_position, subsequence_start*28, rank);
      lasreader->p_count = subsequence_start;
      lasreader->get_Stream()->seek(header_end_read_position + subsequence_start*28);
      //printf("seek pos first loop %lld rank %i\n", lasreader->get_Stream()->tell(), rank);


      if (verbose) fprintf(stderr, "reading %lli points, rank %i\n", subsequence_stop - subsequence_start, rank);

      // *****Read the file for the first time *****
      // this first read and filter of the file is to gather a count of points that pass the filter so that
      // write offsets can be set.
      I64 filtered_count = 0;
      //while (lasreader->read_point()){

      lasreader->MPI_END_POINT = subsequence_stop;
      while (lasreader->read_point())
      {
          filtered_count++;
      }


      // ***** Gather and set the write offset for this process *****
      I64* filtered_counts = (I64*)malloc(process_count * sizeof(I64));
      if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);
      filtered_counts[rank] = filtered_count;
      if(is_mpi)MPI_Allgather(&filtered_count, 1, MPI_LONG_LONG, filtered_counts, 1, MPI_LONG_LONG, MPI_COMM_WORLD);
      if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);

      if(debug) printf("filtered count %lli rank %i\n", filtered_counts[rank], rank);

      if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);

      I64 write_point_offset = 0;
      for (int k=0; k < rank; k++){
	  write_point_offset += filtered_counts[k];
      }
      if(is_mpi){

        MPI_File fh = laswriter->get_MPI_File();
        MPI_Offset cur = 0;

        // jdw, todo, remove the hardcoding by adding methods to read point size from reader
        MPI_File_seek(fh, write_point_offset*28, MPI_SEEK_CUR);
        if(debug){
          MPI_File_get_position(fh, &cur);
          printf ("rank %i, write offset %lld\n", rank, write_point_offset*28);
        }
      }
      if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);


      // ***** Read and filter the input file again, this time write the filtered point since output file offset in now known amd set *****
      //lasreader->seek(subsequence_start); // subsequence_start parameter gets cast to U32 in the implementation and overflows for large files
      // manually set the file offset instead for now
      //printf("header end %lld subseqence_start * 28 %lld rank %i\n", header_end_read_position, subsequence_start*28, rank);
      lasreader->p_count = subsequence_start;
      lasreader->get_Stream()->seek(header_end_read_position + subsequence_start*28);
      //printf("seek pos second loop %lld rank %i\n", lasreader->get_Stream()->tell(), rank);

      lasreader->MPI_END_POINT = subsequence_stop;
      while (lasreader->read_point())
      {
          //if (lasreader->p_count > subsequence_stop) break;

          //if (clip_to_bounding_box)
          //{
          //  if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z))
          //  {
          //    continue;
          //  }
         // }

          if (reproject_quantizer)
          {
            lasreader->point.compute_coordinates();
            geoprojectionconverter.to_target(lasreader->point.coordinates);
            lasreader->point.compute_XYZ(reproject_quantizer);
          }

          laswriter->write_point(&lasreader->point);
          // without extra pass we need inventory of surviving points
    	  if (!extra_pass){
            laswriter->update_inventory(&lasreader->point);
    	  }
      }
      //***** this is part of an mpi write optimization *****
      laswriter->get_Stream()->flushBytes();
    }

    // without the extra pass we need to fix the header now
    // ***** do the inventory reconciliation *****
    // ***** Reduce inventory information in rank 0 *****
    if (is_mpi){
        U32 number_of_point_records = 0;
        U32 number_of_points_by_return[8];
        for(int i = 0; i<8; i++)number_of_points_by_return[i] = 0;
        I32 max_X = 0;
        I32 min_X = 0;
        I32 max_Y = 0;
        I32 min_Y = 0;
        I32 max_Z = 0;
        I32 min_Z = 0;

        MPI_Reduce(&laswriter->inventory.number_of_point_records, &number_of_point_records, 1, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD);
        MPI_Reduce(laswriter->inventory.number_of_points_by_return, number_of_points_by_return, 8, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.max_X, &max_X, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.min_X, &min_X, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.max_Y, &max_Y, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.min_Y, &min_Y, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.max_Z, &max_Z, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
        MPI_Reduce(&laswriter->inventory.min_Z, &min_Z, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);

        if (rank ==0){
            laswriter->inventory.number_of_point_records = number_of_point_records;
            for(int i=0; i<8; i++)laswriter->inventory.number_of_points_by_return[i] = number_of_points_by_return[i];
            laswriter->inventory.max_X = max_X;
            laswriter->inventory.min_X = min_X;
            laswriter->inventory.max_Y = max_Y;
            laswriter->inventory.min_Y = min_Y;
            laswriter->inventory.max_Z = max_Z;
            laswriter->inventory.min_Z = min_Z;
        }
    }

    if(rank == 0){
      if (!extra_pass)
      {
        if (reproject_quantizer) lasreader->header = *reproject_quantizer;
        laswriter->update_header(&lasreader->header, TRUE);
      }
    }
    if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);
    if (verbose) { fprintf(stderr,"%lli surviving points written by rank: %i\n", laswriter->p_count, rank); }

    laswriter->close(FALSE);
    if(is_mpi)MPI_Barrier(MPI_COMM_WORLD);

    delete laswriter;
    lasreader->close();
    delete lasreader;
    if (reproject_quantizer) delete reproject_quantizer;

  }
  if(is_mpi)MPI_Finalize();

  time(&wall_end_time);

  if (verbose) { fprintf(stderr,"total time %.f sec, cpu time: %g sec. rank: %i\n", difftime(wall_end_time, wall_start_time), taketime()-start_time, rank); }
  return 0;
}
예제 #23
0
파일: main.c 프로젝트: dud3/ELEVATOR
int main(int argc, char **argv) {

  int size, rank, rc, root = 0, nameLength = 20;

  MPI_Status status;
  MPI_File configFile = malloc(sizeof configFile);
  MPI_Info info;
  char *configFileName = "./configFile.txt";

  createLogFile();

  MPI_Init(&argc, &argv);
  
  initLogFile();

  MPI_Info_create(&info);

  MPI_Comm_size(MPI_COMM_WORLD, &size);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  printf("%d/%d started.\n", rank+1, size);

  char buf[nameLength + 1];

  rc = MPI_File_open(MPI_COMM_WORLD, configFileName,
                     MPI_MODE_RDONLY, info, &configFile);

  printf("%d/%d achieved the file_open result: %d.\n", rank+1, size, rc);

  // set the individual pointer to our position in the config file
  // master is the master of elevators
  rc = MPI_File_seek(configFile, rank * nameLength, MPI_SEEK_SET);
 
  rc = MPI_File_read(configFile, buf, nameLength, MPI_CHAR, &status);
  buf[nameLength] = '\0';
  int len = nameLength - 1;
  while ((len >= 0) && (buf[len] == ' ')) {
    buf[len] = '\0';
    len--;
  }

  rc = MPI_File_close(&configFile);

  char *ourname = buf;

  char str[50 + nameLength];
  printf(strcat (strcat (strcpy (str, "%d/%d has the name '"), ourname), "'.\n"), rank+1, size);

  //read in the file to be counted, line by line.
  FILE * fp;
  fp = fopen(argv[1], "r");

  if (fp == NULL) {
      printf("%d/%d did not find a document to count! Switching to assignment part 1.\n", rank+1, size);
    assgn = 1;
  } else {
    printf("%d/%d found a document to count. Switching to assignment part 2.\n", rank+1, size);
    assgn = 2;

      /*
      
      // general idea:

    char * line = NULL;
    size_t lineLen = 0;
    ssize_t read;
  
    dlist* list = NULL;
    dlist_create(10, &list);
    printf("size of list is %d\n", list->size);

    for (int c = 0; c < 15; c++) {
        char buf[6]; // long enough for test + num, e.g. test1
        sprintf(buf, "test%d", c);
      dlist_append(&list, buf);
    }

    char* thestring;

    printf("capacity is %d\n", list->capacity);
    for (int c = 0; c < 15; c++) {
      dlist_get(&list, c, &thestring);
      printf("string is: %s\n", thestring);
    }



    while ((read = getline(&line, &lineLen, fp)) != -1) {
      printf("Retrieved line of length %zu :\n", read);
      printf("%s", line);
    }
  
    if (line)
      free(line);

      */

    char * line = NULL;
    size_t lineLen = 0;
    ssize_t read;
  
    dlist_create(10, &list);

      // go through the lines in the file...

    int i = 0;

    while ((read = getline(&line, &lineLen, fp)) != -1) {
        i++;
        
        // ... and map them to the individual worker threads
        if (i % (size-1) == rank-1) {
        dlist_append(&list, line);
        }
    }
  
    if (line)
      free(line);
  }
  
  // we check whether or not we are the root process.
  if (rank == root) {
    master(rank);
  }
  else {
    worker(rank, ourname);
  }
 
  closeLogFile();
  
  printf("%d/%d ended.\n", rank+1, size);

  MPI_Finalize();
}
예제 #24
0
파일: gio.c 프로젝트: kento/gio
void do_collective_read()
{
  MPI_Info info;
  MPI_Datatype contig;
  MPI_Comm sub_read_comm;
  MPI_File fh;
  char coll_path[PATH_LEN];
  int sub_comm_size, sub_rank, sub_comm_color;
  int disp;
  int rc;
  int *buf;

  ptimes[0].start = MPI_Wtime();
  ptimes[1].start = MPI_Wtime();
  sub_comm_color = get_sub_collective_io_comm(&sub_read_comm);  

  /* Construct a datatype for distributing the input data across all
   * processes. */
  MPI_Type_contiguous(data_size / sizeof(int), MPI_INT, &contig);
  MPI_Type_commit(&contig);
  
  /* Set the stripe_count and stripe_size, that is, the striping_factor                                                                                                                                    
   * and striping_unit. Both keys and values for MPI_Info_set must be                                                                                                                                      
   * in the form of ascii strings. */
  MPI_Info_create(&info);
  //  MPI_Info_set(info, "striping_factor", striping_factor);
  //  MPI_Info_set(info, "striping_unit", striping_unit);
  MPI_Info_set(info, "romio_cb_read", "enable");                                                                                                                                                       
  //  MPI_Info_set(info, "romio_cb_read", "disable");

  /* Get path to the target file of the communicator */
  MPI_Comm_size(sub_read_comm, &sub_comm_size);
  get_coll_io_path(coll_path, sub_comm_color);

  /* Delete the output file if it exists so that striping can be set                                                                                                                                          * on the output file. */
  //  rc = MPI_File_delete(coll_path, info);

  /* Create read data*/
  MPI_Comm_rank(sub_read_comm, &sub_rank);
  buf = create_io_data(-1);
  ptimes[1].end = MPI_Wtime();

  MPI_Barrier(MPI_COMM_WORLD);

  /* Open the file */
  ptimes[2].start = MPI_Wtime();
  rc = MPI_File_open(sub_read_comm, coll_path, 
		     MPI_MODE_RDONLY, 
		     info, &fh);
  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_open failed: %s  (%s:%s:%d)", coll_path, __FILE__, __func__, __LINE__);
  }
  ptimes[2].end   = MPI_Wtime();

  /* Set the file view for the output file. In this example, we will                                                                                                                                          * use the same contiguous datatype as we used for reading the data                                                                                                                                          * into local memory. A better example would be to read out just                                                                                                                                            * part of the data, say 4 contiguous elements followed by a gap of                                                                                                                                          * 4 elements, and repeated. */
  ptimes[3].start = MPI_Wtime();
#ifdef GIO_LARGE_FILE
  int i;
  for (i = 0; i < sub_rank; i++) {
    MPI_File_seek(fh, data_size, MPI_SEEK_CUR);
  }
#else  
  disp = sub_rank * data_size;
  MPI_File_set_view(fh, disp, contig, contig, "native", info);
#endif
  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_set_view failed  (%s:%s:%d)", __FILE__, __func__, __LINE__);
  }
  ptimes[3].end = MPI_Wtime();

  /* MPI Collective Read */
  ptimes[4].start = MPI_Wtime();
  rc = MPI_File_read_all(fh, buf, 1, contig, MPI_STATUS_IGNORE);
  if (rc != MPI_SUCCESS) {
    gio_err("MPI_File_set_view failed  (%s:%s:%d)", __FILE__, __func__, __LINE__);
  }
  ptimes[4].end = MPI_Wtime();

  validate_io_data(buf, sub_rank);

  /*Free data*/
  free_io_data(buf);

  /* Close Files */
  ptimes[5].start = MPI_Wtime();
  MPI_File_close(&fh);
  ptimes[5].end = MPI_Wtime();
  ptimes[0].end = MPI_Wtime();

  print_results();

  return;
}
예제 #25
0
//#include "graph.hpp"
void process_files()
{
    std::vector<string> files=getallfilenames("/work/scratch/vv52zasu/inputfiles/");
    //std::vector<string> files=getallfilenames("/home/vv52zasu/mpi/inputfiles/");
    MPI::Status status; 
    int myrank = MPI::COMM_WORLD.Get_rank();
    int size = MPI::COMM_WORLD.Get_size();
    int filecount=0;
/*//////Read files in a loop and write initial data to localmap/////*/

    for(std::vector<string>::iterator it = files.begin(); it != files.end(); ++it)
    {
        if(myrank ==0) std::cout<<"Processing file:"<<(*it).c_str()<<endl;

        MPI::File thefile = MPI::File::Open(MPI::COMM_WORLD, (*it).c_str(), MPI::MODE_RDONLY, MPI::INFO_NULL);
        MPI::Offset filesize = thefile.Get_size();

        char *bufchar, *bufchar_header;
        int CHUNKSIZE = (filesize/size)+1;
        CHUNKSIZE = std::max(CHUNKSIZE, 10000);
        bufchar =  new char[CHUNKSIZE+300000];
        bufchar_header = bufchar;
        bufchar = bufchar + 300000;
        MPI_Status status1;

        MPI_File_seek(thefile, (myrank)*CHUNKSIZE, MPI_SEEK_SET);
        MPI_File_read( thefile, bufchar, CHUNKSIZE, MPI_CHAR, &status1);
        int count=0;
        MPI_Get_count( &status1, MPI_CHAR, &count );

        MPI::COMM_WORLD.Barrier();

        char * pch, *lastsentence;
        pch=strchr(bufchar,'\n');
        while (pch!=NULL)
        {
            if(*(pch+1)=='\n'){ lastsentence = pch+2;}
            pch=strchr(pch+1,'\n');
        }

        int sendcharcount = count -( lastsentence - bufchar );
        if(sendcharcount < 0 || sendcharcount > 300000) sendcharcount =0;
        //cout << "CHUNKSIZE: "<< CHUNKSIZE << "count: " << count << " sendcharcount: " << sendcharcount << endl;
        //cout << lastsentence << endl;
        char *recvptr;
        recvptr = new char[300000];

        int dest=0,src=0;
        if(myrank==size-1)
        {
            dest=0;
            src=myrank-1;
        }
        else if(myrank==0)
        {
            dest=1;src=size-1;
        }
        else
        {
            dest=myrank+1;src=myrank-1;
        }
        //if(sendcharcount >= 300000) cout <<"Process: " << myrank << " sendcharcount:" << sendcharcount <<endl<< lastsentence<<endl;;
        MPI_Sendrecv(lastsentence, sendcharcount, MPI_CHAR, dest, 123, recvptr, CHUNKSIZE, MPI_CHAR, src, 123, MPI_COMM_WORLD, &status1);
        //MPI::COMM_WORLD.Sendrecv(lastsentence, sendcharcount, MPI_CHAR, dest, 123, recvptr, CHUNKSIZE, MPI_CHAR, src, 123, status);
        int recvcount=0;
        MPI_Get_count( &status1, MPI_CHAR, &recvcount );
        //cout << "Process: " << myrank << ". Recvcount: " << recvcount << endl;

        //int recvcount = strlen(recvptr);
        
        bufchar = bufchar -recvcount;
        if(recvcount >= 300000) cout << "Process: " << myrank << " DUDE wtf1 man viswanath"<<endl;
        memcpy(bufchar, recvptr, recvcount);
        int finalcount = lastsentence - bufchar;
        // cout << "Final count: " << finalcount << " total allocated: " << CHUNKSIZE<< " count + recvcount - sendcharcount: " << count+ recvcount -sendcharcount<< endl;
        if( finalcount > CHUNKSIZE+300000) {
            cout << "Process: " << myrank <<  " DUDE wtf man viswanath. "<< "Final Count: "<<finalcount<<endl;
            // if(myrank==32||myrank ==94){
            // for(int i=0;i<100; i++)
            //     cout<<bufchar[i];
            // cout <<endl;
            //  }
        }

        long unsigned int destsize = compressBound(finalcount);
        unsigned char *compressedstr = new unsigned char[destsize];
        int result = compress(compressedstr, &destsize, (unsigned char*)bufchar, finalcount);
        
        compressedvector.push_back(std::make_tuple(compressedstr, destsize, finalcount));
        string finalstr(bufchar, finalcount );
        // //cout << recvcount << endl;
        delete[] recvptr;
        // //cout << finalstr<<endl;
        process_string(finalstr, localmap, frequencymap);
        int msize = (int)mapsize(localmap)+(int)((frequencymap.size()* 20)/(1024*1024));
        int max;
        MPI_Reduce(&msize, &max, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD);
        
        delete[] (bufchar_header);
        if(myrank ==0){
            cout<<"MaxMapsize: "<<max<<endl;    
            std::cout<<"Processing file Ended: "<<(*it).c_str()<<endl;
        }
        filecount++;
        //if(filecount%100 == 0)process_firstlevel(myrank, size);

         
    }
}
예제 #26
0
JNIEXPORT void JNICALL Java_mpi_File_seek(
        JNIEnv *env, jobject jthis, jlong fh, jlong offset, jint whence)
{
    int rc = MPI_File_seek((MPI_File)fh, (MPI_Offset)offset, whence);
    ompi_java_exceptionCheck(env, rc);
}
예제 #27
0
파일: 1.c 프로젝트: jonnyguio/progparela
int main(int argc, char **argv) {

  double *xy;
  double mySUMx, mySUMy, mySUMxy, mySUMxx, SUMx, SUMy, SUMxy,
         SUMxx, SUMres, res, slope, y_intercept, y_estimate,
         begin, end;
  int i, j, n, myid, numprocs, naverage, nremain, mypoints,
    sizeFile, ret;
  /*int new_sleep (int seconds);*/
  MPI_Status istatus;
  MPI_Datatype MPI_POINT;
  MPI_File infile;
  MPI_Offset ishift;

  MPI_Init(&argc, &argv);
  MPI_Comm_rank (MPI_COMM_WORLD, &myid);
  MPI_Comm_size (MPI_COMM_WORLD, &numprocs);

  MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_POINT);
  MPI_Type_commit(&MPI_POINT);

  ret = MPI_File_open(MPI_COMM_WORLD, "1.in", MPI_MODE_RDONLY, MPI_INFO_NULL, &infile);
  if (ret == 0)
    printf("Arquivo aberto com sucesso no processo %d \n", myid);
  else {
    printf("Arquivo aberto com erro no processo %d \n", myid);
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  /* ----------------------------------------------------------
   * Step 1: Process 0 reads data and sends the value of n
   * ---------------------------------------------------------- */


  MPI_File_seek(infile, 0, MPI_SEEK_SET);
  ret = MPI_File_read(infile, &n, 1, MPI_INT, &istatus);

  if (ret == 0)
    printf("Arquivo lido com sucesso no processo %d \n", myid);
  else {
    printf("Arquivo lido com erro no processo %d \n", myid);
    MPI_Abort(MPI_COMM_WORLD, 1);
  }

  naverage = n / numprocs;
  nremain = n % numprocs;

//  printf("%d - %d - %d - %d - %d\n", myid, n, naverage, nremain, 2 * (naverage + nremain));

  xy = (double *) malloc (2 * (naverage + nremain) * sizeof(double));

  /*if (myid == 0) {
    printf ("Number of processes used: %d\n", numprocs);
    printf ("-------------------------------------\n");
    printf ("The x coordinates on worker processes:\n");

    fscanf (infile, "%d", &n);
    x = (double *) malloc (n*sizeof(double));
    y = (double *) malloc (n*sizeof(double));
    for (i=0; i<n; i++)
      fscanf (infile, "%lf %lf", &x[i], &y[i]);
    for (i=1; i<numprocs; i++)
      MPI_Send (&n, 1, MPI_INT, i, 10, MPI_COMM_WORLD);
  }
  else {
    MPI_Recv (&n, 1, MPI_INT, 0, 10, MPI_COMM_WORLD, &istatus);
    x = (double *) malloc (n*sizeof(double));
    y = (double *) malloc (n*sizeof(double));
  }*/
  /* ---------------------------------------------------------- */

  /* ----------------------------------------------------------
   * Step 2: Process 0 sends subsets of x and y
   * ---------------------------------------------------------- */

  if (myid == 0)
    GET_TIME(begin);
  ishift = myid * naverage;
  mypoints = (myid < numprocs - 1) ? naverage : naverage + nremain;

  //MPI_File_set_view(infile, ishift, MPI_POINT, MPI_DOUBLE, "native", MPI_INFO_NULL);
  MPI_File_seek(infile, ishift * sizeof(double) * 2, MPI_SEEK_CUR);
  MPI_File_read(infile, &xy[0], mypoints, MPI_POINT, &istatus);

  /*for (i = 0; i < mypoints * 2; i += 2) {
      printf("(%d) %d: ", myid, i);
      printf("%.0lf ", xy[i]);
      printf("%.0lf\n", xy[i + 1]);
  }
  printf("\n");*/

  /*if (myid == 0) {
    for (i=1; i<numprocs; i++) {
      ishift = i * naverage;
      mypoints = (i < numprocs - 1) ? naverage : naverage + nremain;
      MPI_Send (&ishift, 1, MPI_INT, i, 1, MPI_COMM_WORLD);
      MPI_Send (&mypoints, 1, MPI_INT, i, 2, MPI_COMM_WORLD);
      MPI_Send (&x[ishift], mypoints, MPI_DOUBLE, i, 3, MPI_COMM_WORLD);
      MPI_Send (&y[ishift], mypoints, MPI_DOUBLE, i, 4, MPI_COMM_WORLD);
    }
  }
  else {
    MPI_Recv (&ishift, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &istatus);
    MPI_Recv (&mypoints, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, &istatus);
    MPI_Recv (&x[ishift], mypoints, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD,
	      &istatus);
    MPI_Recv (&y[ishift], mypoints, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD,
	      &istatus);
    printf ("id %d: ", myid);
    for (i=0; i<n; i++) printf("%4.2lf ", x[i]);
    printf ("\n");
  }*/

  /* ----------------------------------------------------------
   * Step 3: Each process calculates its partial sum
   * ---------------------------------------------------------- */
  mySUMx = 0; mySUMy = 0; mySUMxy = 0; mySUMxx = 0;
  if (myid == 0) {
    ishift = 0;
    mypoints = naverage;
  }
  for (j = 0; j < mypoints * 2; j += 2) {
    mySUMx += xy[j];
    mySUMy += xy[j + 1];
    mySUMxy += xy[j] * xy[j + 1];
    mySUMxx += xy[j] * xy[j];
  }

//  printf("%d:\t%lf - %lf - %lf - %lf\n", myid, mySUMx, mySUMy, mySUMxy, mySUMxx);

  /* ----------------------------------------------------------
   * Step 4: Process 0 receives partial sums from the others
   * ---------------------------------------------------------- */

   MPI_Reduce(&mySUMx, &SUMx, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   MPI_Reduce(&mySUMy, &SUMy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   MPI_Reduce(&mySUMxy, &SUMxy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
   MPI_Reduce(&mySUMxx, &SUMxx, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);

   if (myid == 0)
     GET_TIME(end);

  /*if (myid != 0) {
    MPI_Send (&mySUMx, 1, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD);
    MPI_Send (&mySUMy, 1, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD);
    MPI_Send (&mySUMxy, 1, MPI_DOUBLE, 0, 7, MPI_COMM_WORLD);
    MPI_Send (&mySUMxx, 1, MPI_DOUBLE, 0, 8, MPI_COMM_WORLD);
	    }
  else {
    SUMx = mySUMx; SUMy = mySUMy;
    SUMxy = mySUMxy; SUMxx = mySUMxx;
    for (i=1; i<numprocs; i++) {
      MPI_Recv (&mySUMx, 1, MPI_DOUBLE, i, 5, MPI_COMM_WORLD, &istatus);
      MPI_Recv (&mySUMy, 1, MPI_DOUBLE, i, 6, MPI_COMM_WORLD, &istatus);
      MPI_Recv (&mySUMxy, 1, MPI_DOUBLE, i, 7, MPI_COMM_WORLD, &istatus);
      MPI_Recv (&mySUMxx, 1, MPI_DOUBLE, i, 8, MPI_COMM_WORLD, &istatus);
      SUMx = SUMx + mySUMx;
      SUMy = SUMy + mySUMy;
      SUMxy = SUMxy + mySUMxy;
      SUMxx = SUMxx + mySUMxx;
    }
  }*/

  /* ----------------------------------------------------------
   * Step 5: Process 0 does the final steps
   * ---------------------------------------------------------- */
  if (myid == 0) {
    slope = (SUMx * SUMy - n * SUMxy ) / (SUMx * SUMx - n * SUMxx);
    y_intercept = (SUMy - slope * SUMx) / n;

    printf ("\n");
    printf ("The linear equation that best fits the given data:\n");
    printf ("       y = %6.2lfx + %6.2lf\n", slope, y_intercept);
    printf ("--------------------------------------------------\n");
    printf ("   Original (x, y)     Estimated y     Residual\n");
    printf ("--------------------------------------------------\n");
    SUMres = 0;
  }
  MPI_Bcast(&slope, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  MPI_Bcast(&y_intercept, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  for (j = 0; j < numprocs; j++) {
    MPI_Barrier(MPI_COMM_WORLD);
    if (j == myid) {
      SUMres = 0;
      for (i = 0; i < mypoints * 2; i += 2) {
        y_estimate = slope * xy[i] + y_intercept;
        res = xy[i + 1] - y_estimate;
        SUMres = SUMres + res * res;
        printf("   (%6.2lf %6.2lf)      %6.2lf       %6.2lf\n", xy[i], xy[i + 1], y_estimate, res);
      }
    }
  }
  MPI_Barrier(MPI_COMM_WORLD);
  if (myid == 0) {
    printf("--------------------------------------------------\n");
    printf("Residual sum = %6.2lf\n", SUMres);
    printf("Time: %lf\n", end - begin);
  }

  MPI_File_close(&infile);

  MPI_Finalize();
}
예제 #28
0
int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int initFlag;
    MPI_Initialized(&initFlag);
    if (!initFlag) {
        printf("MPI init failed\n");
        return 8;
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank);
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);

    int l,mm=5;
    int nx,ny,nz,lt,nedge;
    float frequency;
    float velmax;
    float dt;
    int ncx_shot1,ncy_shot1,ncz_shot;
    int ishot,ncy_shot,ncx_shot;
    float unit;
    int nxshot,nyshot,dxshot,dyshot;
    char infile[80],outfile[80],logfile[80],tmp[80], nodelog[84];
    FILE  *fin, *fout, *flog, *fnode;
    MPI_File mpi_flog, mpi_fout;
    MPI_Status mpi_status;
    struct timeval start,end;
    float all_time;

    float *u, *v, *w, *up, *up1, *up2,
            *vp, *vp1, *vp2, *wp, *wp1, *wp2,
            *us, *us1, *us2, *vs, *vs1, *vs2,
            *ws, *ws1, *ws2, *vpp, *density, *vss;
    float c[5][7];
    float *wave;
    float nshot,t0,tt,c0;
    float dtx,dtz,dtxz,dr1,dr2,dtx4,dtz4,dtxz4;
    char message[100];

    if(argc<4)
    {
        printf("please add 3 parameter: inpurfile, outfile, logfile\n");
        exit(1);
    }

    message[99] = 0;    // Avoid string buffer overrun

    strcpy(infile,argv[1]);
    strcpy(outfile,argv[2]);
    strcpy(logfile,argv[3]);
    strcpy(nodelog,logfile);
    strcat(nodelog, ".node");

    strcpy(tmp,"date ");
    strncat(tmp, ">> ",3);
    strncat(tmp, logfile, strlen(logfile));
    if (proc_rank == 0) {
        flog = fopen(logfile,"w");
        fprintf(flog,"------------start time------------\n");
        fclose(flog);
        system(tmp);
        gettimeofday(&start,NULL);
    }
    fin = fopen(infile,"r");
    if(fin == NULL)
    {
        printf("file %s is  not exist\n",infile);
        exit(2);
    }
    fscanf(fin,"nx=%d\n",&nx);
    fscanf(fin,"ny=%d\n",&ny);
    fscanf(fin,"nz=%d\n",&nz);
    fscanf(fin,"lt=%d\n",&lt);
    fscanf(fin,"nedge=%d\n",&nedge);
    fscanf(fin,"ncx_shot1=%d\n",&ncx_shot1);
    fscanf(fin,"ncy_shot1=%d\n",&ncy_shot1);
    fscanf(fin,"ncz_shot=%d\n",&ncz_shot);
    fscanf(fin,"nxshot=%d\n",&nxshot);
    fscanf(fin,"nyshot=%d\n",&nyshot);
    fscanf(fin,"frequency=%f\n",&frequency);
    fscanf(fin,"velmax=%f\n",&velmax);
    fscanf(fin,"dt=%f\n",&dt);
    fscanf(fin,"unit=%f\n",&unit);
    fscanf(fin,"dxshot=%d\n",&dxshot);
    fscanf(fin,"dyshot=%d\n",&dyshot);
    fclose(fin);
    if (proc_rank == 0) {   // Master
        printf("\n--------workload parameter--------\n");
        printf("nx=%d\n",nx);
        printf("ny=%d\n",ny);
        printf("nz=%d\n",nz);
        printf("lt=%d\n",lt);
        printf("nedge=%d\n",nedge);
        printf("ncx_shot1=%d\n",ncx_shot1);
        printf("ncy_shot1=%d\n",ncy_shot1);
        printf("ncz_shot=%d\n",ncz_shot);
        printf("nxshot=%d\n",nxshot);
        printf("nyshot=%d\n",nyshot);
        printf("frequency=%f\n",frequency);
        printf("velmax=%f\n",velmax);
        printf("dt=%f\n",dt);
        printf("unit=%f\n",unit);
        printf("dxshot=%d\n",dxshot);
        printf("dyshot=%d\n\n",dyshot);

        flog = fopen(logfile,"a");
        fprintf(flog,"\n--------workload parameter--------\n");
        fprintf(flog,"nx=%d\n",nx);
        fprintf(flog,"ny=%d\n",ny);
        fprintf(flog,"nz=%d\n",nz);
        fprintf(flog,"lt=%d\n",lt);
        fprintf(flog,"nedge=%d\n",nedge);
        fprintf(flog,"ncx_shot1=%d\n",ncx_shot1);
        fprintf(flog,"ncy_shot1=%d\n",ncy_shot1);
        fprintf(flog,"ncz_shot=%d\n",ncz_shot);
        fprintf(flog,"nxshot=%d\n",nxshot);
        fprintf(flog,"nyshot=%d\n",nyshot);
        fprintf(flog,"frequency=%f\n",frequency);
        fprintf(flog,"velmax=%f\n",velmax);
        fprintf(flog,"dt=%f\n",dt);
        fprintf(flog,"unit=%f\n",unit);
        fprintf(flog,"dxshot=%d\n",dxshot);
        fprintf(flog,"dyshot=%d\n\n",dyshot);
        fclose(flog);
        fnode = fopen(nodelog, "a");
        fprintf(fnode,"World size: %d\n", world_size);
        fclose(fnode);
    }

#ifdef _WITH_PHI
    // [Afa] It is recommended that for Intel Xeon Phi data is 64-byte aligned.
    // Upon successful completion, posix_memalign() shall return zero
    if (posix_memalign((void **)&u  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&v  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&w  , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&up2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&wp2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&us2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&vs2, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws , 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws1, 64, sizeof(float)*nz*ny*nx)) return 2;
    if (posix_memalign((void **)&ws2, 64, sizeof(float)*nz*ny*nx)) return 2;
#else
    u       = (float*)malloc(sizeof(float)*nz*ny*nx);
    v       = (float*)malloc(sizeof(float)*nz*ny*nx);
    w       = (float*)malloc(sizeof(float)*nz*ny*nx);
    up      = (float*)malloc(sizeof(float)*nz*ny*nx);
    up1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    up2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp      = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wp2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us      = (float*)malloc(sizeof(float)*nz*ny*nx);
    us1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    us2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs      = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    vs2     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws      = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws1     = (float*)malloc(sizeof(float)*nz*ny*nx);
    ws2     = (float*)malloc(sizeof(float)*nz*ny*nx);
#endif
    // [Afa] Those are not offloaded to phi yet
    vpp     = (float*)malloc(sizeof(float)*nz*ny*nx);
    density = (float*)malloc(sizeof(float)*nz*ny*nx);
    vss     = (float*)malloc(sizeof(float)*nz*ny*nx);
    wave = (float*)malloc(sizeof(float)*lt);

    nshot=nxshot*nyshot;
    t0=1.0/frequency;

    // [Afa] Branch optmization
    // TODO: Will compiler optimize the `condition'?
    //       i.e Can I write `for(i=0;i< (nz < 210 ? nz : 210);i++)'?
    int condition = nz < 210 ? nz : 210;
    for(int i=0; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2300.;
                vss[i*ny*nx+j*nx+k]=1232.;
                density[i*ny*nx+j*nx+k]=1.;
            }
        }
    }

    condition = nz < 260 ? nz : 260;
    for(int i=210; i < condition;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++) {
                vpp[i*ny*nx+j*nx+k]=2800.;
                vss[i*ny*nx+j*nx+k]=1509.;
                density[i*ny*nx+j*nx+k]=2.;
            }
        }
    }

    for(int i=260;i<nz;i++) {
        for(int j=0;j<ny;j++) {
            for(int k=0;k<nx;k++)
            {
                vpp[i*ny*nx+j*nx+k]=3500.;
                vss[i*ny*nx+j*nx+k]=1909.;
                density[i*ny*nx+j*nx+k]=2.5;
            }
        }
    }

    for(l=0;l<lt;l++)
    {
        tt=l*dt;
        tt=tt-t0;
        float sp=PIE*frequency*tt;
        float fx=100000.*exp(-sp*sp)*(1.-2.*sp*sp);
        wave[l]=fx;
    }

    // TODO: [Afa] Data produced by code below are static. See table below
    if(mm==5)
    {
        c0=-2.927222164;
        c[0][0]=1.66666665;
        c[1][0]=-0.23809525;
        c[2][0]=0.03968254;
        c[3][0]=-0.004960318;
        c[4][0]=0.0003174603;
    }

    c[0][1]=0.83333;
    c[1][1]=-0.2381;
    c[2][1]=0.0595;
    c[3][1]=-0.0099;
    c[4][1]=0.0008;

    for(int i=0;i<5;i++)
        for(int j=0;j<5;j++)
            c[j][2+i]=c[i][1]*c[j][1];
    /*
     * mm == 5, c =
     * 1.666667    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * -0.238095   -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.039683    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * -0.004960   -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000317    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
    */

    /*
     * mm != 5, c =
     * 0.000000    0.833330    0.694439    -0.198416   0.049583    -0.008250   0.000667
     * 0.000000    -0.238100   -0.198416   0.056692    -0.014167   0.002357    -0.000190
     * 0.000000    0.059500    0.049583    -0.014167   0.003540    -0.000589   0.000048
     * 0.000000    -0.009900   -0.008250   0.002357    -0.000589   0.000098    -0.000008
     * 0.000000    0.000800    0.000667    -0.000190   0.000048    -0.000008   0.000001
     */

    dtx=dt/unit;
    dtz=dt/unit;
    dtxz=dtx*dtz;

    dr1=dtx*dtx/2.;
    dr2=dtz*dtz/2.;

    dtx4=dtx*dtx*dtx*dtx;
    dtz4=dtz*dtz*dtz*dtz;
    dtxz4=dtx*dtx*dtz*dtz;

    if (proc_rank == 0) {
        fout = fopen(outfile, "wb");
        fclose(fout);
    }   // [Afa] Truncate file. We need a prettier way

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_File_open(MPI_COMM_WORLD, outfile, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_fout);
    MPI_File_open(MPI_COMM_WORLD, nodelog, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_flog);
    // [Afa] *About Nodes Number* nshot (i.e nxshot * nyshot) should be multiple of node numbers,
    //       or there will be hungry processes
    int loop_per_proc = ((int)nshot % world_size == 0) ? (nshot / world_size) : (nshot / world_size + 1);
    printf("\x1B[31mDEBUG:\x1b[39;49m World size %d, Loop per Proc %d, nshot %f, I am No. %d\n",
           world_size, loop_per_proc, nshot, proc_rank);

    //    for(ishot=1;ishot<=nshot;ishot++)   // [Afa] nshot is 20 in para1.in, but 200 in para2.in
    for (int loop_index = 0; loop_index < loop_per_proc; ++loop_index)
    {
        ishot = loop_index + proc_rank * loop_per_proc + 1; // [Afa] See commented code 2 lines above to understand this line
        if (ishot <= nshot) { // [Afa] ishot <= nshot
            printf("shot %d, process %d\n",ishot, proc_rank);
            snprintf(message, 29, "shot %6d, process %6d\n", ishot, proc_rank);     // [Afa] Those numbers:
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);                // 28: string without '\0'
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);           // 29: with '\0'
        } else {
            printf("shot HUNGRY, process %d\n", proc_rank);
            snprintf(message, 29, "shot HUNGRY, process %6d\n", proc_rank);
            MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET);
            MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status);
            continue;
        }
        ncy_shot=ncy_shot1+(ishot/nxshot)*dyshot;
        ncx_shot=ncx_shot1+(ishot%nxshot)*dxshot;

        // [Afa] Matrix is zeroed in every loop
        // i.e. The relation between those matrices in each loop is pretty loose
        // Matrices not zeroed are: vpp, density, vss and wave, and they're not changed (read-only)
        // We only need to partially collect matrix `up'

        // TODO: [Afa] Get a better way to pass those pointers, and mark them as `restrict'
        // And WHY are they using cpp as extension? C++11 doesn't support `restrict'
        zero_matrices(u, w, ws2, up2, vp1, wp1, us, ws, wp, us2, us1, wp2,
                      v, up1, nz, nx, up, ny, ws1, vs, vp2, vs1, vs2, vp);

        for(l=1;l<=lt;l++)
        {
            float xmax=l*dt*velmax;
            int nleft=ncx_shot-xmax/unit-10;
            int nright=ncx_shot+xmax/unit+10;
            int nfront=ncy_shot-xmax/unit-10;
            int nback=ncy_shot+xmax/unit+10;
            int ntop=ncz_shot-xmax/unit-10;
            int nbottom=ncz_shot+xmax/unit+10;
            if(nleft<5) nleft=5;
            if(nright>nx-5) nright=nx-5;
            if(nfront<5) nfront=5;
            if(nback>ny-5) nback=ny-5;
            if(ntop<5) ntop=5;
            if(nbottom>nz-5) nbottom=nz-5;
            ntop = ntop-1;
            nfront = nfront-1;
            nleft = nleft-1;

            // Although up, vp, wp, us, vs, ws are modified below, we're sure there's no race condition.
            // Each loop accesses a UNIQUE element in the array, and the value is not used, no need to worry about the dirty cache
#pragma omp parallel for shared(u) shared(v) shared(w) shared(up1) shared(up2) shared(vp1) shared(vp2) shared(wp1) \
    shared(wp2) shared(us) shared(us1) shared(us2) shared(vs) shared(vs1) shared(vs2) shared(ws) shared(ws1) shared(ws2) \
    shared(vss) shared(vpp) shared(dr1) shared(dr2) shared(dtz) shared(dtx) shared(ncx_shot) shared(ncy_shot) shared(ncz_shot) \
    shared(wave)
            for(int k=ntop;k<nbottom;k++) {
                for(int j=nfront;j<nback;j++) {
                    for(int i=nleft;i<nright;i++)
                    {
                        float vvp2,drd1,drd2,vvs2;
                        float px,sx;
                        if(i==ncx_shot-1&&j==ncy_shot-1&&k==ncz_shot-1)
                        {
                            px=1.;
                            sx=0.;
                        }
                        else
                        {
                            px=0.;
                            sx=0.;
                        }
                        vvp2=vpp[k*ny*nx+j*nx+i]*vpp[k*ny*nx+j*nx+i];
                        drd1=dr1*vvp2;
                        drd2=dr2*vvp2;

                        vvs2=vss[k*ny*nx+j*nx+i]*vss[k*ny*nx+j*nx+i];
                        drd1=dr1*vvs2;
                        drd2=dr2*vvs2;

                        float tempux2=0.0f;
                        float tempuy2=0.0f;
                        float tempuz2=0.0f;
                        float tempvx2=0.0f;
                        float tempvy2=0.0f;
                        float tempvz2=0.0f;
                        float tempwx2=0.0f;
                        float tempwy2=0.0f;
                        float tempwz2=0.0f;
                        float tempuxz=0.0f;
                        float tempuxy=0.0f;
                        float tempvyz=0.0f;
                        float tempvxy=0.0f;
                        float tempwxz=0.0f;
                        float tempwyz=0.0f;

                        // This will make the compiler do the vectorization
                        for(int kk=1;kk<=mm;kk++) {
                            tempux2 += c[kk-1][0]*(u[k*ny*nx+j*nx+(i+kk)]+u[k*ny*nx+j*nx+(i-kk)]);
                            tempuy2 += c[kk-1][0]*(u[k*ny*nx+(j+kk)*nx+i]+u[k*ny*nx+(j-kk)*nx+i]);
                            tempuz2 += c[kk-1][0]*(u[(k+kk)*ny*nx+j*nx+i]+u[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempvx2 += c[kk-1][0]*(v[k*ny*nx+j*nx+(i+kk)]+v[k*ny*nx+j*nx+(i-kk)]);
                            tempvy2 += c[kk-1][0]*(v[k*ny*nx+(j+kk)*nx+i]+v[k*ny*nx+(j-kk)*nx+i]);
                            tempvz2 += c[kk-1][0]*(v[(k+kk)*ny*nx+j*nx+i]+v[(k-kk)*ny*nx+j*nx+i]);
                        }
                        for(int kk=1;kk<=mm;kk++) {
                            tempwx2 += c[kk-1][0]*(w[k*ny*nx+j*nx+(i+kk)]+w[k*ny*nx+j*nx+(i-kk)]);
                            tempwy2 += c[kk-1][0]*(w[k*ny*nx+(j+kk)*nx+i]+w[k*ny*nx+(j-kk)*nx+i]);
                            tempwz2 += c[kk-1][0]*(w[(k+kk)*ny*nx+j*nx+i]+w[(k-kk)*ny*nx+j*nx+i]);
                        }

                         //for(kk=1;kk<=mm;kk++) end

                        tempux2=(tempux2+c0*u[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        // u[k][j][i]
                        tempuy2=(tempuy2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        // u[k][j][i]
                        tempuz2=(tempuz2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;
                        // u[k][j][i]

                        tempvx2=(tempvx2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempvy2=(tempvy2+c0*v[k*ny*nx+j*nx+i])*vvp2*dtx*dtx;
                        tempvz2=(tempvz2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtz*dtz;

                        tempwx2=(tempwx2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwy2=(tempwy2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx;
                        tempwz2=(tempwz2+c0*w[k*ny*nx+j*nx+i])*vvp2*dtz*dtz;

                        // This loop is auto-vectorized
                        for(int kk=1;kk<=mm;kk++)
                        {
                            for(int kkk=1;kkk<=mm;kkk++)
                            {
                                tempuxz=tempuxz+c[kkk-1][1+kk]*(u[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -u[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +u[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -u[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                                // u[k+kkk][j][i+kk], u[k-kkk][j][i+kk], u[k-kkk][j][i-kk], u[k+kkk][j][i-kk]
                                tempuxy=tempuxy+c[kkk-1][1+kk]*(u[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -u[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +u[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -u[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempvyz=tempvyz+c[kkk-1][1+kk]*(v[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -v[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +v[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -v[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempvxy=tempvxy+c[kkk-1][1+kk]*(v[k*ny*nx+(j+kkk)*nx+(i+kk)]
                                        -v[k*ny*nx+(j-kkk)*nx+(i+kk)]
                                        +v[k*ny*nx+(j-kkk)*nx+(i-kk)]
                                        -v[k*ny*nx+(j+kkk)*nx+(i-kk)]);

                                tempwyz=tempwyz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+(j+kk)*nx+i]
                                        -w[(k-kkk)*ny*nx+(j+kk)*nx+i]
                                        +w[(k-kkk)*ny*nx+(j-kk)*nx+i]
                                        -w[(k+kkk)*ny*nx+(j-kk)*nx+i]);
                                tempwxz=tempwxz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+j*nx+(i+kk)]
                                        -w[(k-kkk)*ny*nx+j*nx+(i+kk)]
                                        +w[(k-kkk)*ny*nx+j*nx+(i-kk)]
                                        -w[(k+kkk)*ny*nx+j*nx+(i-kk)]);
                            } // for(kkk=1;kkk<=mm;kkk++) end
                        } //for(kk=1;kk<=mm;kk++) end

                        // LValues below are only changed here
                        up[k*ny*nx+j*nx+i]=2.*up1[k*ny*nx+j*nx+i]-up2[k*ny*nx+j*nx+i]
                                +tempux2+tempwxz*vvp2*dtz*dtx
                                +tempvxy*vvp2*dtz*dtx;
                        // up1[k][j][j], up2[k][j][i], up[k][j][i]
                        vp[k*ny*nx+j*nx+i]=2.*vp1[k*ny*nx+j*nx+i]-vp2[k*ny*nx+j*nx+i]
                                +tempvy2+tempuxy*vvp2*dtz*dtx
                                +tempwyz*vvp2*dtz*dtx;
                        wp[k*ny*nx+j*nx+i]=2.*wp1[k*ny*nx+j*nx+i]-wp2[k*ny*nx+j*nx+i]
                                +tempwz2+tempuxz*vvp2*dtz*dtx
                                +tempvyz*vvp2*dtz*dtx
                                +px*wave[l-1];
                        us[k*ny*nx+j*nx+i]=2.*us1[k*ny*nx+j*nx+i]-us2[k*ny*nx+j*nx+i]+tempuy2+tempuz2
                                -tempvxy*vvs2*dtz*dtx-tempwxz*vvs2*dtz*dtx;
                        vs[k*ny*nx+j*nx+i]=2.*vs1[k*ny*nx+j*nx+i]-vs2[k*ny*nx+j*nx+i]+tempvx2+tempvz2
                                -tempuxy*vvs2*dtz*dtx-tempwyz*vvs2*dtz*dtx;
                        ws[k*ny*nx+j*nx+i]=2.*ws1[k*ny*nx+j*nx+i]-ws2[k*ny*nx+j*nx+i]+tempwx2+tempwy2
                                -tempuxz*vvs2*dtz*dtx-tempvyz*vvs2*dtz*dtx;
                    }//for(i=nleft;i<nright;i++) end
                }
            }

            // Again, those are UNIQUE access. Safe to share
#pragma omp parallel for shared(up) shared(us) shared(vp) shared(vs) shared(wp) shared(ws) shared(u) shared(v) shared(w) \
    shared(up2) shared(up1) shared(us2) shared(us1) shared(vp2) shared(vp1) shared(wp2) shared(wp1) shared(ws2) shared(ws1)
            for(int k=ntop;k<nbottom;k++)
                for(int j=nfront;j<nback;j++)
                    for(int i=nleft;i<nright;i++)
                    {
                        u[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]+us[k*ny*nx+j*nx+i];
                        v[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]+vs[k*ny*nx+j*nx+i];
                        w[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]+ws[k*ny*nx+j*nx+i];

                        up2[k*ny*nx+j*nx+i]=up1[k*ny*nx+j*nx+i];
                        up1[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i];
                        us2[k*ny*nx+j*nx+i]=us1[k*ny*nx+j*nx+i];
                        us1[k*ny*nx+j*nx+i]=us[k*ny*nx+j*nx+i];
                        vp2[k*ny*nx+j*nx+i]=vp1[k*ny*nx+j*nx+i];
                        vp1[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i];
                        vs2[k*ny*nx+j*nx+i]=vs1[k*ny*nx+j*nx+i];
                        vs1[k*ny*nx+j*nx+i]=vs[k*ny*nx+j*nx+i];
                        wp2[k*ny*nx+j*nx+i]=wp1[k*ny*nx+j*nx+i];
                        wp1[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i];
                        ws2[k*ny*nx+j*nx+i]=ws1[k*ny*nx+j*nx+i];
                        ws1[k*ny*nx+j*nx+i]=ws[k*ny*nx+j*nx+i];
                    }//for(i=nleft;i<nright;i++) end
        }//for(l=1;l<=lt;l++) end
        // [Afa] Do we need to keep the order of data?
        // [Afa Update] Yes, we do need to KEEP THE ORDER of data
        //        fwrite(up+169*ny*nx,sizeof(float),ny*nx,fout);    // This is the original fwrite

        MPI_File_seek(mpi_fout, (ishot - 1) * ny * nx * sizeof(float), MPI_SEEK_SET);
        MPI_File_write(mpi_fout, up + 169 * ny * nx, ny * nx, MPI_FLOAT, &mpi_status);

    }//for(ishot=1;ishot<=nshot;ishot++) end

    MPI_File_close(&mpi_fout);
    MPI_File_close(&mpi_flog);

    free(u);
    free(v);
    free(w);
    free(up);
    free(up1);
    free(up2);
    free(vp);
    free(vp1);
    free(vp2);
    free(wp);
    free(wp1);
    free(wp2);
    free(us);
    free(us1);
    free(us2);
    free(vs);
    free(vs1);
    free(vs2);
    free(ws);
    free(ws1);
    free(ws2);
    free(vpp);
    free(density);
    free(vss);
    free(wave);

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();

    if (proc_rank == 0) {
        gettimeofday(&end,NULL);
        all_time = (end.tv_sec-start.tv_sec)+(float)(end.tv_usec-start.tv_usec)/1000000.0;
        printf("run time:\t%f s\n",all_time);
        flog = fopen(logfile,"a");
        fprintf(flog,"\nrun time:\t%f s\n\n",all_time);
        fclose(flog);
        flog = fopen(logfile,"a");
        fprintf(flog,"------------end time------------\n");
        fclose(flog);
        system(tmp);
    }


    // Why return 1?
    return 0;
}
예제 #29
0
/*
Major reconstruction of memory management for -off_cache flag
*/
void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS,
                           struct Bench* Bmark, MODES BMODE, int iter, int size)
/*


                      Initializes communications buffers (call set_buf)
                      Initializes iterations scheduling


Input variables:


-Bmark                (type struct Bench*)
                      (For explanation of struct Bench type:
                      describes all aspects of modes of a benchmark;
                      see [1] for more information)

                      Current benchmark

-BMODE                (type MODES)
                      aggregate / non aggregate

-iter                 (type int)
                      number of current iteration of message size loop

-size                 (type int)
                      Message size


In/out variables:

-c_info               (type struct comm_info*)
                      Collection of all base data for MPI;
                      see [1] for more information

                      Communications buffers are allocated and assigned values

-ITERATIONS           (type struct iter_schedule*)
                      Adaptive number of iterations, out of cache scheduling are
                      setup if requested



*/
/* >> IMB 3.1  */
{
    /* IMB 3.1 << */
    size_t s_len, r_len, s_alloc, r_alloc;
    int init_size, irep, i_s, i_r, x_sample;
    const int root_based = has_root(Bmark->name);


    x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr;

    /* July 2002 fix V2.2.1: */
#if (defined EXT || defined MPIIO || RMA)
    if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr;
#endif

    ITERATIONS->n_sample = (size > 0)
                           ? max(1, min(ITERATIONS->overall_vol / size, x_sample))
                           : x_sample;

    Bmark->sample_failure = 0;

    init_size = max(size, asize);

    if (c_info->rank < 0) {
        return;
    } else {

        if (ITERATIONS->iter_policy == imode_off) {
            ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample;
        } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) {
            /* n_sample for benchmarks with uneven distribution of works
               must be greater or equal and multiple to num_procs.
               The formula below is a negative leg of hyperbola.
               It's moved and scaled relative to max message size
               and initial n_sample subject to multiple to num_procs.
            */
            double d_n_sample = ITERATIONS->msgspersample;
            int max_msg_size = 1<<c_info->max_msg_log;
            int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5);
            ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs);
        } /* else as is */
    }

    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name, "Ialltoall")  || !strcmp(Bmark->name, "Ialltoall_pure")
        || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure")
#else
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)c_info->num_procs * (size_t)init_size;
    }
    else if (
#ifdef MPI1
        !strcmp(Bmark->name, "Allgather")   || !strcmp(Bmark->name, "Allgatherv")
        || !strcmp(Bmark->name, "Gather")      || !strcmp(Bmark->name, "Gatherv")
#elif defined NBC
        !strcmp(Bmark->name, "Iallgather")  || !strcmp(Bmark->name, "Iallgather_pure")
        || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure")
        || !strcmp(Bmark->name, "Igather")     || !strcmp(Bmark->name, "Igather_pure")
        || !strcmp(Bmark->name, "Igatherv")    || !strcmp(Bmark->name, "Igatherv_pure")
#else // MPI1 // NBC
        0
#endif // MPI1 // NBC
    )
    {
        s_len = (size_t) init_size;
        r_len = (size_t) c_info->num_procs * (size_t)init_size;
    }
    else if( !strcmp(Bmark->name,"Exchange") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = (size_t) init_size;
    }
    else if(
#ifdef MPI1
        !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Iscatter")  || !strcmp(Bmark->name,"Iscatter_pure")
        || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        s_len = (size_t)c_info->num_procs * (size_t)init_size;
        r_len = (size_t)init_size;
    } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) {
        s_len = r_len = 0;
    }
    else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") )
    {
        s_len = 2 * (size_t)init_size;
        r_len = 2 * (size_t)init_size;
    }
    else if (! strcmp(Bmark->name,"Compare_and_swap") )
    {
        /* Compare_and_swap operations require 3 buffers, so allocate space for compare
         * buffers in our r_buffer */
        s_len = (size_t)init_size;
        r_len = 3 * (size_t)init_size;
    }
    else
    {
        s_len = r_len = (size_t) init_size;
    }

    /*===============================================*/
    /* the displ is declared as int by MPI1 standard
       If c_info->num_procs*init_size  exceed INT_MAX value there is no way to run this sample
     */
    if (
#ifdef MPI1
        !strcmp(Bmark->name,"Alltoallv")  ||
        !strcmp(Bmark->name,"Allgatherv") ||
        !strcmp(Bmark->name,"Scatterv")   ||
        !strcmp(Bmark->name,"Gatherv")
#elif defined NBC // MPI1
        !strcmp(Bmark->name,"Ialltoallv")  || !strcmp(Bmark->name,"Ialltoallv_pure")  ||
        !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") ||
        !strcmp(Bmark->name,"Iscatterv")   || !strcmp(Bmark->name,"Iscatterv_pure")   ||
        !strcmp(Bmark->name,"Igatherv")    || !strcmp(Bmark->name,"Igatherv_pure")
#else // NBC // MPI1
        0
#endif // NBC // MPI1
    )
    {
        if( s_len > INT_MAX || r_len > INT_MAX) {
            Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW;
            return;
        }
    }
    /*===============================================*/

    /* IMB 3.1: new memory management for -off_cache */
    if (BMODE->type == Sync) {
        ITERATIONS->use_off_cache=0;
        ITERATIONS->n_sample=x_sample;
    } else {
#ifdef MPIIO
        ITERATIONS->use_off_cache=0;
#else
        ITERATIONS->use_off_cache = ITERATIONS->off_cache;
#endif
        if (ITERATIONS->off_cache) {
            if ( ITERATIONS->cache_size > 0) {
                size_t cls = (size_t) ITERATIONS->cache_line_size;
                size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls;
                ITERATIONS->s_offs = ofs;
                ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
                ofs = ( ( r_len + cls -1 )/cls + 1 )*cls;
                ITERATIONS->r_offs = ofs;
                ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs);
            } else {
                ITERATIONS->s_offs=ITERATIONS->r_offs=0;
                ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1;
            }
        }
    }

#ifdef MPIIO
    s_alloc = s_len;
    r_alloc = r_len;
#else
    if( ITERATIONS->use_off_cache ) {
        s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs);
        r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs);
    } else {
        s_alloc = s_len;
        r_alloc = r_len;
    }
#endif

    c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT;

#ifdef DEBUG
    {
        size_t mx, mu;

        mx = (size_t) MEM_UNIT*c_info->max_mem;
        mu = (size_t) MEM_UNIT*c_info->used_mem;

        DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample);
        DBG_I2("max  / used memory ",mx,mu);
        DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc);
        DBGF_I2("Got send / recv lengths ",s_len,r_len);
        DBGF_I2("max  / used memory ",mx,mu);
        DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs);
        DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter);
        DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc);
    }
#endif

    if( c_info->used_mem > c_info->max_mem ) {
        Bmark->sample_failure=SAMPLE_FAILED_MEMORY;
        return;
    }

    if (s_alloc > 0  && r_alloc > 0) {
        if (ITERATIONS->use_off_cache) {
            IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc);
            IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1);

            for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) {
                i_s = irep % ITERATIONS->s_cache_iter;
                memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len);
            }

            for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) {
                i_r = irep % ITERATIONS->r_cache_iter;
                memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len);
            }
        } else {
            IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1);
        }
    }

    IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc));

    /* Determine #iterations if dynamic adaptation requested */
    if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) {
        double time[MAX_TIME_ID];
        int acc_rep_test, t_sample;
        int selected_n_sample = ITERATIONS->n_sample;

        memset(time, 0, MAX_TIME_ID);
        if (iter == 0 || BMODE->type == Sync) {
            ITERATIONS->n_sample_prev = ITERATIONS->msgspersample;
            if (c_info->n_lens > 0) {
                memset(ITERATIONS->numiters, 0, c_info->n_lens);
            }
        }

        /* first, run 1 iteration only */
        ITERATIONS->n_sample=1;
#ifdef MPI1
        c_info->select_source = Bmark->select_source;
#endif
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);

        time[1] = time[0];

#ifdef MPIIO
        if( Bmark->access != no) {
            ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
            MPI_ERRHAND(ierr);

            if( Bmark->fpointer == shared) {
                ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);
            }
        }
#endif /*MPIIO*/

        MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);

        {   /* determine rough #repetitions for a run time of 1 sec */
            int rep_test = 1;
            if (time[0] < (1.0 / MSGSPERSAMPLE)) {
                rep_test = MSGSPERSAMPLE;
            } else if ((time[0] < 1.0)) {
                rep_test = (int)(1.0 / time[0] + 0.5);
            }

            MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator);
        }

        ITERATIONS->n_sample = min(selected_n_sample, acc_rep_test);

        if (ITERATIONS->n_sample > 1) {
#ifdef MPI1
            c_info->select_source = Bmark->select_source;
#endif
            Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
            time[1] = time[0];
#ifdef MPIIO
            if( Bmark->access != no) {
                ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET);
                MPI_ERRHAND(ierr);

                if ( Bmark->fpointer == shared) {
                    ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET);
                    MPI_ERRHAND(ierr);
                }
            }
#endif /*MPIIO*/

            MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator);
        }

        {
            float val = (float) (1+ITERATIONS->secs/time[0]);
            t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff))
                       ? (int)val
                       : selected_n_sample;
        }

        if (c_info->n_lens>0 && BMODE->type != Sync) {
            // check monotonicity with msg sizes
            int i;
            for (i = 0; i < iter; i++) {
                t_sample = ( c_info->msglen[i] < size )
                           ? min(t_sample,ITERATIONS->numiters[i])
                           : max(t_sample,ITERATIONS->numiters[i]);
            }
            ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample);
        } else {
            ITERATIONS->n_sample = min(selected_n_sample,
                                       min(ITERATIONS->n_sample_prev, t_sample));
        }

        MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator);

#ifdef DEBUG
        {
            int usec=time*1000000;

            DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec);
            DBGF_I1("=> # samples, aligned with previous ",t_sample);
            DBGF_I1("final #samples ",ITERATIONS->n_sample);
        }
#endif
    } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/
        double time[MAX_TIME_ID];
        Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]);
    }

    ITERATIONS->n_sample_prev=ITERATIONS->n_sample;

    /* >> IMB 3.1  */

}
uint readVectorFromEnsight( latticeMesh* mesh, scalar*** field, char* fname ) {
    
    
    unsigned int status = 0;
    
    

    // Open file

    char name[200];

    sprintf(name, "lattice.%s_%d", fname, timeToIndex(mesh->time.current));

    MPI_File file;

    MPI_File_open( MPI_COMM_WORLD, name, MPI_MODE_RDONLY, MPI_INFO_NULL, &file );
    


    // Allocate space
        
    *field = matrixDoubleAlloc(mesh->mesh.nPoints, 3, 0);

    float *auxField = (float*)malloc( mesh->mesh.nPoints * sizeof(float) );




    // Set Offset

    MPI_Offset offset = 240*sizeof(char) + sizeof(int);

    uint i,j;

    for(i = 0 ; i < mesh->parallel.pid ; i++ ) {

	offset += 3*mesh->parallel.nodesPerPatch[i] * sizeof(float);

	offset += 160*sizeof(char) + sizeof(int);

    }

    MPI_File_seek(file, offset, MPI_SEEK_SET);



    
    // Read Array

    MPI_Status st;

    for( j = 0 ; j < 3 ; j++) {

	MPI_File_read(file, auxField, mesh->mesh.nPoints, MPI_FLOAT, &st);

	for( i = 0 ; i < mesh->mesh.nPoints ; i++ ) {

	    field[0][i][j] = (scalar)auxField[i];

	}
    
	MPI_Barrier(MPI_COMM_WORLD);

    }
    
    MPI_File_close(&file);

    free(auxField);



    if( (int)st._ucount/sizeof(float) == mesh->mesh.nPoints ) {

	status = 1;
	
    }

    
    
    return status;


}