stream_offset seek(stream_offset off, std::ios_base::seekdir way) { // Advances the read/write head by off characters, // returning the new position, where the offset is // calculated from: // - the start of the sequence if way == ios_base::beg // - the current position if way == ios_base::cur // - the end of the sequence if way == ios_base::end switch (way) { case std::ios::beg: MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_SET); break; case std::ios::end: MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_END); break; case std::ios::cur: MPI_File_seek(mpifile, (MPI_Offset)(off), MPI_SEEK_CUR); break; default: abort(); break; // Should never happen } MPI_Offset pos; MPI_File_get_position(mpifile, &pos); ++seeks; return pos; }
FORTRAN_API void FORT_CALL mpi_file_seek_(MPI_Fint *fh,MPI_Offset *offset,int *whence, int *ierr ) { MPI_File fh_c; fh_c = MPI_File_f2c(*fh); *ierr = MPI_File_seek(fh_c,*offset,*whence); }
int lemonFinishReading(LemonReader *reader) { int read; int size; MPI_Status status; char MPImode[] = "native"; if (!reader->is_busy) return LEMON_SUCCESS; MPI_Comm_size(reader->cartesian, &size); MPI_File_read_at_all_end(*reader->fp, reader->buffer, &status); reader->pos += reader->bytes_wanted; MPI_File_set_view(*reader->fp, reader->off, MPI_BYTE, MPI_BYTE, MPImode, MPI_INFO_NULL); MPI_File_seek(*reader->fp, reader->pos, MPI_SEEK_SET); MPI_Get_count(&status, MPI_BYTE, &read); /* Doing a data read should never get us to EOF, only header scanning */ if (read != (reader->is_striped ? reader->bytes_wanted / size : reader->bytes_wanted)) { fprintf(stderr, "[LEMON] Node %d reports in lemonFinishReading:\n" " Could not read the required amount of data.\n", reader->my_rank); return LEMON_ERR_READ; } reader->bytes_wanted = 0; reader->buffer = NULL; reader->is_busy = 0; reader->is_striped = 0; return LEMON_SUCCESS; }
Bool MPIStream_SetOffset( Stream* stream, SizeT sizeToWrite, MPI_Comm communicator ) { MPI_Offset offset = 0; int rank; int nproc; unsigned int localSizeToWrite; unsigned int sizePartialSum; if ( stream->_file == NULL ) { return False; } if ( stream->_file->type != MPIFile_Type ) { return False; } MPI_Comm_rank( communicator, &rank ); MPI_Comm_size( communicator, &nproc ); /* Sum up the individual sizeToWrites for processors lower than this one */ localSizeToWrite = sizeToWrite; MPI_Scan( &localSizeToWrite, &sizePartialSum, 1, MPI_UNSIGNED, MPI_SUM, communicator ); /* Now, just subtract the sizeToWrite of current processor to get our start point */ offset = sizePartialSum - localSizeToWrite; MPI_File_seek( *(MPI_File*)stream->_file->fileHandle, offset, MPI_SEEK_SET ); return True; }
int main(int argc, char **argv) { /* MPI stuff. */ MPI_File fh; int my_rank, mpi_size; int data_in; MPI_Status status; /* Initialize MPI. */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /*MPI_Get_processor_name(mpi_name, &mpi_namelen);*/ /*printf("mpi_name: %s size: %d rank: %d\n", mpi_name, mpi_size, my_rank);*/ if (my_rank == 0) { printf("\n*** Testing basic MPI file I/O.\n"); printf("*** testing file create with parallel I/O with MPI..."); } if (MPI_File_open(MPI_COMM_WORLD, FILE, MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fh) != MPI_SUCCESS) ERR; if (MPI_File_seek(fh, my_rank * sizeof(int), MPI_SEEK_SET) != MPI_SUCCESS) ERR; if (MPI_File_write(fh, &my_rank, 1, MPI_INT, &status) != MPI_SUCCESS) ERR; if (MPI_File_close(&fh) != MPI_SUCCESS) ERR; /* Reopen and check the file. */ if (MPI_File_open(MPI_COMM_WORLD, FILE, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh) != MPI_SUCCESS) ERR; if (MPI_File_seek(fh, my_rank * sizeof(int), MPI_SEEK_SET) != MPI_SUCCESS) ERR; if (MPI_File_read(fh, &data_in, 1, MPI_INT, &status) != MPI_SUCCESS) ERR; if (data_in != my_rank) ERR; if (MPI_File_close(&fh) != MPI_SUCCESS) ERR; /* Shut down MPI. */ MPI_Finalize(); if (my_rank == 0) { SUMMARIZE_ERR; FINAL_RESULTS; } return 0; }
void cache_flush_ind(int myid, int numprocs, int size, char *filename) { char *buf; MPI_File fh; double time; int64_t comp = 0; assert(size != 0); if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL) { fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n", MAX_BUFFER_SIZE); } MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); MPI_File_seek(fh, 0, MPI_SEEK_SET); time = MPI_Wtime(); while (comp != size) { if (size - comp > MAX_BUFFER_SIZE) { comp += MAX_BUFFER_SIZE; MPI_File_write(fh, buf, MAX_BUFFER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE); } else { int tmp_bytes = size - comp; comp += size - comp; MPI_File_write(fh, buf, tmp_bytes, MPI_BYTE, MPI_STATUS_IGNORE); } } free(buf); MPI_File_sync(fh); time = MPI_Wtime() - time; MPI_File_close(&fh); MPI_File_delete(filename, MPI_INFO_NULL); fprintf(stderr, "proc %d:cache_flush_ind: File %s written/deleted of " "size %.1f MBytes\n" "Time: %f secs Bandwidth: %f MBytes / sec\n\n", myid, filename, comp*numprocs/1024.0/1024.0, time, comp*numprocs/1024.0/1024.0 / time); }
/* * parallel_rewind() */ int parallel_rewind(coordinateInfo *C) { #ifdef MPI int err; err=MPI_File_seek( *(C->mfp), 0L, MPI_SEEK_SET); return err; #endif return 1; }
void ompi_file_seek_f(MPI_Fint *fh, MPI_Offset *offset, MPI_Fint *whence, MPI_Fint *ierr) { int c_ierr; MPI_File c_fh = MPI_File_f2c(*fh); c_ierr = MPI_File_seek(c_fh, (MPI_Offset) *offset, OMPI_FINT_2_INT(*whence)); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); }
// whoAreWe .. 0 for elevator, 1 for person // ourID .. which number are we // floor .. where are we void dumpLog(int whoAreWe, int ourID, char* name, char* msg, int floor) { // what time is it now? time_t lfTime; time(&lfTime); char lfStrTime[1000]; struct tm * p = localtime(&lfTime); strftime(lfStrTime, 1000, "%c", p); char lfBuffer[lfChunkLen+1]; for (int i = 0; i < lfChunkLen; i++) { lfBuffer[i] = ' '; } char *newBuffer; newBuffer = strcpy(lfBuffer, "["); newBuffer = strcat(newBuffer, lfStrTime); // newBuffer = strcat(newBuffer, ctime(&lfTime)); newBuffer = strcat(newBuffer, "] "); char const lfDigit[] = "0123456789"; char b[] = "0"; char c[] = "0"; char* strID = b; strID[0] = lfDigit[ourID]; char* floorID = c; *floorID = lfDigit[floor]; if (whoAreWe == 0) { newBuffer = strcat(strcat(newBuffer, "Elevator "), strID); } else { newBuffer = strcat(strcat(newBuffer, "Person "), strID); newBuffer = strcat(strcat(strcat(newBuffer, " ("), name), ")"); } strcat(strcat(strcat(newBuffer, msg), floorID), "."); int i = 0; while (lfBuffer[i] != '\0') { i += 1; } lfBuffer[i] = ' '; lfBuffer[lfChunkLen-1] = '\n'; lfBuffer[lfChunkLen] = '\0'; MPI_File_seek(lfFile, (lfPos * lfSize + lfRank) * lfChunkLen, MPI_SEEK_SET); MPI_File_write(lfFile, lfBuffer, lfChunkLen, MPI_CHAR, &lfStatus); lfPos += 1; }
int main( int argc, char *argv[] ) { int errs = 0; int size, rank, i, *buf, rc; MPI_File fh; MPI_Comm comm; MPI_Status status; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; MPI_File_open( comm, (char*)"test.ord", MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh ); MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); buf = (int *)malloc( size * sizeof(int) ); buf[0] = rank; rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status ); if (rc) { MTestPrintErrorMsg( "File_write_ordered", rc ); errs++; } /* make sure all writes finish before we seek/read */ MPI_Barrier(comm); /* Set the individual pointer to 0, since we want to use a read_all */ MPI_File_seek( fh, 0, MPI_SEEK_SET ); MPI_File_read_all( fh, buf, size, MPI_INT, &status ); for (i=0; i<size; i++) { if (buf[i] != i) { errs++; fprintf( stderr, "%d: buf[%d] = %d\n", rank, i, buf[i] ); } } MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); for (i=0; i<size; i++) buf[i] = -1; MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status ); if (buf[0] != rank) { errs++; fprintf( stderr, "%d: buf[0] = %d\n", rank, buf[0] ); } free( buf ); MPI_File_close( &fh ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
void saveVectorMPI(char* name, const Vector x) { MPI_File f; MPI_File_open(*x->comm, name, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f); MPI_File_seek(f, 17*x->displ[x->comm_rank], MPI_SEEK_SET); for (int i=0;i<x->len;++i) { char num[21]; sprintf(num,"%016f ",x->data[i]); MPI_File_write(f, num, 17, MPI_CHAR, MPI_STATUS_IGNORE); } MPI_File_close(&f); }
/* parallel_fseek_end() */ int parallel_fseek_end(coordinateInfo *C) { #ifdef MPI int err; err=MPI_File_seek( *(C->mfp),0L,MPI_SEEK_END); if (err!=MPI_SUCCESS) { printMPIerr(err,"trajFile_fseek_end:"); return 1; } else return 0; #endif return 1; }
/* * parallel_fseek() */ int parallel_fseek(coordinateInfo *C, int frame) { #ifdef MPI int err; err=MPI_File_seek( *(C->mfp), C->titleSize+(frame*C->frameSize), MPI_SEEK_SET); if (err!=MPI_SUCCESS) { printMPIerr(err,"trajFile_fseek"); return 1; } else return 0; #endif return 1; }
void ReadCombinedParallelFile(ug::BinaryBuffer &buffer, std::string strFilename, pcl::ProcessCommunicator pc) { MPI_Status status; MPI_Comm m_mpiComm = pc.get_mpi_communicator(); MPI_File fh; char filename[1024]; strcpy(filename, strFilename.c_str()); if(MPI_File_open(m_mpiComm, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh)) UG_THROW("could not open "<<filename); std::vector<int> allNextOffsets; allNextOffsets.resize(pc.size()+1); allNextOffsets[0] = (pc.size()+1)*sizeof(int); bool bFirst = pc.get_proc_id(0) == pcl::ProcRank(); if(bFirst) { int numProcs; MPI_File_read(fh, &numProcs, sizeof(numProcs), MPI_BYTE, &status); UG_COND_THROW(numProcs != pcl::NumProcs(), "checkPoint numProcs = " << numProcs << ", but running on " << pcl::NumProcs()); for(size_t i=1; i<allNextOffsets.size(); i++) { MPI_File_read(fh, &allNextOffsets[i], sizeof(allNextOffsets[i]), MPI_BYTE, &status); // UG_LOG("allNextOffsets[" << i << "] = " << allNextOffsets[i] << "\n"); } } int myNextOffset, myNextOffset2; MPI_Scatter(&allNextOffsets[0], 1, MPI_INT, &myNextOffset, 1, MPI_INT, pc.get_proc_id(0), m_mpiComm); MPI_Scatter(&allNextOffsets[1], 1, MPI_INT, &myNextOffset2, 1, MPI_INT, pc.get_proc_id(0), m_mpiComm); int mySize = myNextOffset2-myNextOffset; // UG_LOG_ALL_PROCS("MySize = " << mySize << "\n" << "myNextOffset = " << myNextOffset << " - " << myNextOffset2 << "\n"); MPI_File_seek(fh, myNextOffset, MPI_SEEK_SET); char *p = new char[mySize]; MPI_File_read(fh, p, mySize, MPI_BYTE, &status); buffer.clear(); buffer.reserve(mySize); buffer.write(p, mySize); delete[] p; MPI_File_close(&fh); // UG_LOG("File read.\n"); }
void WriteCombinedParallelFile(ug::BinaryBuffer &buffer, std::string strFilename, pcl::ProcessCommunicator pc) { MPI_Status status; MPI_Comm m_mpiComm = pc.get_mpi_communicator(); MPI_File fh; bool bFirst = pc.get_proc_id(0) == pcl::ProcRank(); char filename[1024]; strcpy(filename, strFilename.c_str()); if(MPI_File_open(m_mpiComm, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh)) UG_THROW("could not open "<<filename); int mySize = buffer.write_pos(); int myNextOffset = 0; MPI_Scan(&mySize, &myNextOffset, 1, MPI_INT, MPI_SUM, m_mpiComm); std::vector<int> allNextOffsets; allNextOffsets.resize(pc.size(), 0); //else allNextOffsets.resize(1); myNextOffset += (pc.size()+1)*sizeof(int); MPI_Gather(&myNextOffset, 1, MPI_INT, &allNextOffsets[0], 1, MPI_INT, pc.get_proc_id(0), m_mpiComm); if(bFirst) { int numProcs = pcl::NumProcs(); MPI_File_write(fh, &numProcs, sizeof(numProcs), MPI_BYTE, &status); for(size_t i=0; i<allNextOffsets.size(); i++) { // UG_LOG("allNextOffsets[" << i << "] = " << allNextOffsets[i] << "\n"); MPI_File_write(fh, &allNextOffsets[i], sizeof(allNextOffsets[i]), MPI_BYTE, &status); } } int myOffset = myNextOffset - buffer.write_pos(); MPI_File_seek(fh, myOffset, MPI_SEEK_SET); // UG_LOG_ALL_PROCS("MySize = " << mySize << "\n" << " myOffset = " << myOffset << "\n"); // UG_LOG_ALL_PROCS("buffer.write_pos() = " << buffer.write_pos() << "\n" << "(pc.size()+1)*sizeof(size_t) = " << (pc.size()+1)*sizeof(size_t) << "\n"); MPI_File_write(fh, buffer.buffer(), buffer.write_pos(), MPI_BYTE, &status); MPI_File_close(&fh); }
void seissol::checkpoint::mpio::Wavefield::write(const void* header, size_t headerSize) { SCOREP_USER_REGION("CheckPoint_write", SCOREP_USER_REGION_TYPE_FUNCTION); logInfo(rank()) << "Checkpoint backend: Writing."; // Write the header writeHeader(header, headerSize); // Save data SCOREP_USER_REGION_DEFINE(r_write_wavefield); SCOREP_USER_REGION_BEGIN(r_write_wavefield, "checkpoint_write_wavefield", SCOREP_USER_REGION_TYPE_COMMON); checkMPIErr(setDataView(file())); unsigned int totalIter = totalIterations(); unsigned int iter = iterations(); unsigned int count = dofsPerIteration(); if (m_useLargeBuffer) { totalIter = (totalIter + sizeof(real) - 1) / sizeof(real); iter = (iter + sizeof(real) - 1) / sizeof(real); count *= sizeof(real); } unsigned long offset = 0; for (unsigned int i = 0; i < totalIter; i++) { if (i == iter-1) // Last iteration count = numDofs() - (iter-1) * count; checkMPIErr(MPI_File_write_all(file(), const_cast<real*>(&dofs()[offset]), count, MPI_DOUBLE, MPI_STATUS_IGNORE)); if (i < iter-1) offset += count; // otherwise we just continue writing the last chunk over and over else if (i != totalIter-1) checkMPIErr(MPI_File_seek(file(), -count * sizeof(real), MPI_SEEK_CUR)); } SCOREP_USER_REGION_END(r_write_wavefield); // Finalize the checkpoint finalizeCheckpoint(); logInfo(rank()) << "Checkpoint backend: Writing. Done."; }
void wordCountLog(char* key, int value) { // what time is it now? time_t lfTime; time(&lfTime); char lfStrTime[1000]; struct tm * p = localtime(&lfTime); strftime(lfStrTime, 1000, "%c", p); char lfBuffer[lfChunkLen+1]; for (int i = 0; i < lfChunkLen; i++) { lfBuffer[i] = ' '; } char *newBuffer; newBuffer = strcpy(lfBuffer, "["); newBuffer = strcat(newBuffer, lfStrTime); // newBuffer = strcat(newBuffer, ctime(&lfTime)); newBuffer = strcat(newBuffer, "] "); char const lfDigit[] = "0123456789"; char b[] = "0"; char* valID = b; valID[0] = lfDigit[value]; newBuffer = strcat(strcat(strcat(newBuffer, key), ": "), valID); int i = 0; while (lfBuffer[i] != '\0') { i += 1; } lfBuffer[i] = ' '; lfBuffer[lfChunkLen-1] = '\n'; lfBuffer[lfChunkLen] = '\0'; MPI_File_seek(lfFile, (lfPos * lfSize + lfRank) * lfChunkLen, MPI_SEEK_SET); MPI_File_write(lfFile, lfBuffer, lfChunkLen, MPI_CHAR, &lfStatus); lfPos += 1; }
static IOR_offset_t SeekOffset_MPIIO(MPI_File fd, IOR_offset_t offset, IOR_param_t * param) { int offsetFactor, tasksPerFile; IOR_offset_t tempOffset; tempOffset = offset; if (param->filePerProc) { offsetFactor = 0; tasksPerFile = 1; } else { offsetFactor = (rank + rankOffset) % param->numTasks; tasksPerFile = param->numTasks; } if (param->useFileView) { /* recall that offsets in a file view are counted in units of transfer size */ if (param->filePerProc) { tempOffset = tempOffset / param->transferSize; } else { /* * this formula finds a file view offset for a task * from an absolute offset */ tempOffset = ((param->blockSize / param->transferSize) * (tempOffset / (param->blockSize * tasksPerFile))) + (((tempOffset % (param->blockSize * tasksPerFile)) - (offsetFactor * param->blockSize)) / param->transferSize); } } MPI_CHECK(MPI_File_seek(fd, tempOffset, MPI_SEEK_SET), "cannot seek offset"); return(offset); } /* SeekOffset_MPIIO() */
void PullInMPI_IOSymbols() { #ifdef PARALLEL //Don't call this! EXCEPTION1(ImproperUseException, "Do not call PullInMPI_IOSymbols"); MPI_Info info; MPI_File fh; MPI_Offset sz; char *nm; int whence; void *buf; int count; MPI_Datatype datatype; MPI_Status status; MPI_File_open(VISIT_MPI_COMM, nm, 0, info, &fh); MPI_File_get_size(fh, &sz); MPI_File_seek(fh, sz, whence); MPI_File_read(fh, buf, count, datatype, &status); #endif }
int main(int argc, char **argv) { MPI_File fh; MPI_Status status; MPI_Offset size; long long *buf, i; char *filename; int j, mynod, nprocs, len, flag, err; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs != 1) { fprintf(stderr, "Run this program on one process only\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: large -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); fprintf(stderr, "This program creates an 4 Gbyte file. Don't run it if you don't have that much disk space!\n"); buf = (long long *) malloc(SIZE * sizeof(long long)); if (!buf) { fprintf(stderr, "not enough memory to allocate buffer\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); for (i=0; i<NTIMES; i++) { for (j=0; j<SIZE; j++) buf[j] = i*SIZE + j; err = MPI_File_write(fh, buf, SIZE, MPI_DOUBLE, &status); /* MPI_DOUBLE because not all MPI implementations define MPI_LONG_LONG_INT, even though the C compiler supports long long. */ if (err != MPI_SUCCESS) { fprintf(stderr, "MPI_File_write returned error\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_File_get_size(fh, &size); fprintf(stderr, "file size = %lld bytes\n", size); MPI_File_seek(fh, 0, MPI_SEEK_SET); for (j=0; j<SIZE; j++) buf[j] = -1; flag = 0; for (i=0; i<NTIMES; i++) { err = MPI_File_read(fh, buf, SIZE, MPI_DOUBLE, &status); /* MPI_DOUBLE because not all MPI implementations define MPI_LONG_LONG_INT, even though the C compiler supports long long. */ if (err != MPI_SUCCESS) { fprintf(stderr, "MPI_File_write returned error\n"); MPI_Abort(MPI_COMM_WORLD, 1); } for (j=0; j<SIZE; j++) if (buf[j] != i*SIZE + j) { fprintf(stderr, "error: buf %d is %lld, should be %lld \n", j, buf[j], i*SIZE + j); flag = 1; } } if (!flag) fprintf(stderr, "Data read back is correct\n"); MPI_File_close(&fh); free(buf); free(filename); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i, n, nlocal; int numprocs, dims[2], periods[2], keep_dims[2]; int myrank, my2drank, mycoords[2]; MPI_File f; char* filename = "input/16"; MPI_Comm comm_2d, comm_row, comm_col; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); dims[ROW] = dims[COL] = sqrt(numprocs); periods[ROW] = periods[COL] = 1; MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); MPI_Comm_rank(comm_2d, &my2drank); MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); keep_dims[ROW] = 0; keep_dims[COL] = 1; MPI_Cart_sub(comm_2d, keep_dims, &comm_row); keep_dims[ROW] = 1; keep_dims[COL] = 0; MPI_Cart_sub(comm_2d, keep_dims, &comm_col); if(MPI_File_open(comm_2d, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) { fprintf(stderr, "Cannot open file %s\n", filename); MPI_Abort(comm_2d, FILE_NOT_FOUND); MPI_Finalize(); return 1; } MPI_File_seek(f, 0, MPI_SEEK_SET); MPI_File_read(f, &n, 1, MPI_INT, &status); nlocal = n/dims[ROW]; int *a = (int *)malloc(nlocal * nlocal * sizeof(int)); for(i = 0; i < nlocal; i++) { MPI_File_seek(f, ((mycoords[0] * nlocal + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET); MPI_File_read(f, &a[i * nlocal], nlocal, MPI_INT, &status); } MPI_File_close(&f); int j; if(my2drank == 3) { for(i = 0; i < nlocal; i++) { for(j = 0; j < nlocal; j++) { printf("%d ", a[i * nlocal +j]); } printf("\n"); } } double start = MPI_Wtime(); floyd_all_pairs_sp_2d(n, nlocal, a, comm_2d, comm_row, comm_col); double stop = MPI_Wtime(); printf("[%d] Completed in %1.3f seconds\n", my2drank, stop-start); MPI_Comm_free(&comm_col); MPI_Comm_free(&comm_row); if(my2drank == 3) { for(i = 0; i < nlocal; i++) { for(j = 0; j < nlocal; j++) { printf("%d ", a[i * nlocal +j]); } printf("\n"); } } if(MPI_File_open(comm_2d, "output/16", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) { printf("Cannot open file %s\n", "out"); MPI_Abort(comm_2d, FILE_NOT_FOUND); MPI_Finalize(); return 1; } if(my2drank == 0) { MPI_File_seek(f, 0, MPI_SEEK_SET); MPI_File_write(f, &n, 1, MPI_INT, &status); } for(i = 0; i < nlocal; i++) { MPI_File_seek(f, ((mycoords[0] * nlocal + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET); MPI_File_write(f, &a[i * nlocal], nlocal, MPI_INT, &status); } MPI_File_close(&f); free(a); MPI_Comm_free(&comm_2d); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i; int is_mpi = 1; int debug = 0; bool verbose = false; bool force = false; // fixed header changes int set_version_major = -1; int set_version_minor = -1; int set_point_data_format = -1; int set_point_data_record_length = -1; int set_gps_time_endcoding = -1; // variable header changes bool remove_extra_header = false; bool remove_all_variable_length_records = false; int remove_variable_length_record = -1; int remove_variable_length_record_from = -1; int remove_variable_length_record_to = -1; bool remove_tiling_vlr = false; bool remove_original_vlr = false; // extract a subsequence //unsigned int subsequence_start = 0; //unsigned int subsequence_stop = U32_MAX; I64 subsequence_start = 0; I64 subsequence_stop = I64_MAX; // fix files with corrupt points bool clip_to_bounding_box = false; double start_time = 0; time_t wall_start_time; time_t wall_end_time; LASreadOpener lasreadopener; //if(is_mpi)lasreadopener.setIsMpi(TRUE); GeoProjectionConverter geoprojectionconverter; LASwriteOpener laswriteopener; if(is_mpi)laswriteopener.setIsMpi(TRUE); int process_count = 1; int rank = 0; start_time = taketime(); time(&wall_start_time); if (is_mpi){ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&process_count); MPI_Comm_rank(MPI_COMM_WORLD,&rank); if(debug) printf ("MPI task %d has started...\n", rank); } if (argc == 1) { fprintf(stderr,"las2las.exe is better run in the command line or via the lastool.exe GUI\n"); char file_name[256]; fprintf(stderr,"enter input file: "); fgets(file_name, 256, stdin); file_name[strlen(file_name)-1] = '\0'; lasreadopener.set_file_name(file_name); fprintf(stderr,"enter output file: "); fgets(file_name, 256, stdin); file_name[strlen(file_name)-1] = '\0'; laswriteopener.set_file_name(file_name); } else { for (i = 1; i < argc; i++) { //if (argv[i][0] == '�') argv[i][0] = '-'; if (strcmp(argv[i],"-week_to_adjusted") == 0) { set_gps_time_endcoding = 1; } else if (strcmp(argv[i],"-adjusted_to_week") == 0) { set_gps_time_endcoding = 0; } } if (!geoprojectionconverter.parse(argc, argv)) byebye(true); if (!lasreadopener.parse(argc, argv)) byebye(true); if (!laswriteopener.parse(argc, argv)) byebye(true); } for (i = 1; i < argc; i++) { if (argv[i][0] == '\0') { continue; } else if (strcmp(argv[i],"-h") == 0 || strcmp(argv[i],"-help") == 0) { fprintf(stderr, "LAStools (by [email protected]) version %d\n", LAS_TOOLS_VERSION); usage(); } else if (strcmp(argv[i],"-v") == 0 || strcmp(argv[i],"-verbose") == 0) { verbose = true; } else if (strcmp(argv[i],"-version") == 0) { fprintf(stderr, "LAStools (by [email protected]) version %d\n", LAS_TOOLS_VERSION); byebye(); } else if (strcmp(argv[i],"-gui") == 0) { fprintf(stderr, "WARNING: not compiled with GUI support. ignoring '-gui' ...\n"); } else if (strcmp(argv[i],"-cores") == 0) { fprintf(stderr, "WARNING: not compiled with multi-core batching. ignoring '-cores' ...\n"); i++; } else if (strcmp(argv[i],"-force") == 0) { force = true; } else if (strcmp(argv[i],"-subseq") == 0) { if ((i+2) >= argc) { fprintf(stderr,"ERROR: '%s' needs 2 arguments: start stop\n", argv[i]); byebye(true); } subsequence_start = (unsigned int)atoi(argv[i+1]); subsequence_stop = (unsigned int)atoi(argv[i+2]); i+=2; } else if (strcmp(argv[i],"-start_at_point") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: start\n", argv[i]); byebye(true); } subsequence_start = (unsigned int)atoi(argv[i+1]); i+=1; } else if (strcmp(argv[i],"-stop_at_point") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: stop\n", argv[i]); byebye(true); } subsequence_stop = (unsigned int)atoi(argv[i+1]); i+=1; } else if (strcmp(argv[i],"-set_version") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: major.minor\n", argv[i]); byebye(true); } if (sscanf(argv[i+1],"%d.%d",&set_version_major,&set_version_minor) != 2) { fprintf(stderr, "ERROR: cannot understand argument '%s' for '%s'\n", argv[i+1], argv[i]); usage(true); } i+=1; } else if (strcmp(argv[i],"-set_version_major") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: major\n", argv[i]); byebye(true); } set_version_major = atoi(argv[i+1]); i+=1; } else if (strcmp(argv[i],"-set_version_minor") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: minor\n", argv[i]); byebye(true); } set_version_minor = atoi(argv[i+1]); i+=1; } else if (strcmp(argv[i],"-remove_extra") == 0) { remove_extra_header = true; } else if (strcmp(argv[i],"-remove_all_vlrs") == 0) { remove_all_variable_length_records = true; } else if (strcmp(argv[i],"-remove_vlr") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: number\n", argv[i]); byebye(true); } remove_variable_length_record = atoi(argv[i+1]); remove_variable_length_record_from = -1; remove_variable_length_record_to = -1; i++; } else if (strcmp(argv[i],"-remove_vlrs_from_to") == 0) { if ((i+2) >= argc) { fprintf(stderr,"ERROR: '%s' needs 2 arguments: start end\n", argv[i]); byebye(true); } remove_variable_length_record = -1; remove_variable_length_record_from = atoi(argv[i+1]); remove_variable_length_record_to = atoi(argv[i+2]); i+=2; } else if (strcmp(argv[i],"-remove_tiling_vlr") == 0) { remove_tiling_vlr = true; i++; } else if (strcmp(argv[i],"-remove_original_vlr") == 0) { remove_original_vlr = true; i++; } else if (strcmp(argv[i],"-set_point_type") == 0 || strcmp(argv[i],"-set_point_data_format") == 0 || strcmp(argv[i],"-point_type") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: type\n", argv[i]); byebye(true); } set_point_data_format = atoi(argv[i+1]); i++; } else if (strcmp(argv[i],"-set_point_data_record_length") == 0 || strcmp(argv[i],"-set_point_size") == 0 || strcmp(argv[i],"-point_size") == 0) { if ((i+1) >= argc) { fprintf(stderr,"ERROR: '%s' needs 1 argument: size\n", argv[i]); byebye(true); } set_point_data_record_length = atoi(argv[i+1]); i++; } else if (strcmp(argv[i],"-clip_to_bounding_box") == 0 || strcmp(argv[i],"-clip_to_bb") == 0) { clip_to_bounding_box = true; } else if ((argv[i][0] != '-') && (lasreadopener.get_file_name_number() == 0)) { lasreadopener.add_file_name(argv[i]); argv[i][0] = '\0'; } else { fprintf(stderr, "ERROR: cannot understand argument '%s'\n", argv[i]); usage(true); } } // check input if (!lasreadopener.active()) { fprintf(stderr,"ERROR: no input specified\n"); usage(true, argc==1); } BOOL extra_pass = laswriteopener.is_piped(); // for piped output we need an extra pass if (extra_pass) { if (lasreadopener.is_piped()) { fprintf(stderr, "ERROR: input and output cannot both be piped\n"); usage(true); } } // make sure we do not corrupt the input file if (lasreadopener.get_file_name() && laswriteopener.get_file_name() && (strcmp(lasreadopener.get_file_name(), laswriteopener.get_file_name()) == 0)) { fprintf(stderr, "ERROR: input and output file name are identical\n"); usage(true); } // possibly loop over multiple input files while (lasreadopener.active()) { // if (verbose) start_time = taketime(); // open lasreader LASreader* lasreader = lasreadopener.open(); if (lasreader == 0) { fprintf(stderr, "ERROR: could not open lasreader\n"); usage(true, argc==1); } // store the inventory for the header LASinventory lasinventory; // the point we write sometimes needs to be copied LASpoint* point = 0; // prepare the header for output if (set_gps_time_endcoding != -1) { if (set_gps_time_endcoding == 0) { if ((lasreader->header.global_encoding & 1) == 0) { fprintf(stderr, "WARNING: global encoding indicates file already in GPS week time\n"); if (force) { fprintf(stderr, " forced conversion.\n"); } else { fprintf(stderr, " use '-force' to force conversion.\n"); byebye(true); } } else { lasreader->header.global_encoding &= ~1; } } else if (set_gps_time_endcoding == 1) { if ((lasreader->header.global_encoding & 1) == 1) { fprintf(stderr, "WARNING: global encoding indicates file already in Adjusted Standard GPS time\n"); if (force) { fprintf(stderr, " forced conversion.\n"); } else { fprintf(stderr, " use '-force' to force conversion.\n"); byebye(true); } } else { lasreader->header.global_encoding |= 1; } } } if (set_version_major != -1) { if (set_version_major != 1) { fprintf(stderr, "ERROR: unknown version_major %d\n", set_version_major); byebye(true); } lasreader->header.version_major = (U8)set_version_major; } if (set_version_minor >= 0) { if (set_version_minor > 4) { fprintf(stderr, "ERROR: unknown version_minor %d\n", set_version_minor); byebye(true); } if (set_version_minor < 3) { if (lasreader->header.version_minor == 3) { lasreader->header.header_size -= 8; lasreader->header.offset_to_point_data -= 8; } else if (lasreader->header.version_minor >= 4) { lasreader->header.header_size -= (8 + 140); lasreader->header.offset_to_point_data -= (8 + 140); } } else if (set_version_minor == 3) { if (lasreader->header.version_minor < 3) { lasreader->header.header_size += 8; lasreader->header.offset_to_point_data += 8; lasreader->header.start_of_waveform_data_packet_record = 0; } else if (lasreader->header.version_minor >= 4) { lasreader->header.header_size -= 140; lasreader->header.offset_to_point_data -= 140; } } else if (set_version_minor == 4) { if (lasreader->header.version_minor < 3) { lasreader->header.header_size += (8 + 140); lasreader->header.offset_to_point_data += (8 + 140); lasreader->header.start_of_waveform_data_packet_record = 0; } else if (lasreader->header.version_minor == 3) { lasreader->header.header_size += 140; lasreader->header.offset_to_point_data += 140; } } if ((set_version_minor <= 3) && (lasreader->header.version_minor >= 4)) { if (lasreader->header.point_data_format > 5) { switch (lasreader->header.point_data_format) { case 6: fprintf(stderr, "WARNING: downgrading point_data_format from %d to 1\n", lasreader->header.point_data_format); lasreader->header.point_data_format = 1; fprintf(stderr, " and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2); lasreader->header.point_data_record_length -= 2; break; case 7: fprintf(stderr, "WARNING: downgrading point_data_format from %d to 3\n", lasreader->header.point_data_format); lasreader->header.point_data_format = 3; fprintf(stderr, " and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2); lasreader->header.point_data_record_length -= 2; break; case 8: fprintf(stderr, "WARNING: downgrading point_data_format from %d to 3\n", lasreader->header.point_data_format); lasreader->header.point_data_format = 3; fprintf(stderr, " and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 4); lasreader->header.point_data_record_length -= 4; break; case 9: fprintf(stderr, "WARNING: downgrading point_data_format from %d to 4\n", lasreader->header.point_data_format); lasreader->header.point_data_format = 4; fprintf(stderr, " and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 2); lasreader->header.point_data_record_length -= 2; break; case 10: fprintf(stderr, "WARNING: downgrading point_data_format from %d to 5\n", lasreader->header.point_data_format); lasreader->header.point_data_format = 5; fprintf(stderr, " and point_data_record_length from %d to %d\n", lasreader->header.point_data_record_length, lasreader->header.point_data_record_length - 4); lasreader->header.point_data_record_length -= 4; break; default: fprintf(stderr, "ERROR: unknown point_data_format %d\n", lasreader->header.point_data_format); byebye(true); } } point = new LASpoint; point->init(&lasreader->header, lasreader->header.point_data_format, lasreader->header.point_data_record_length); } lasreader->header.version_minor = (U8)set_version_minor; } // are we supposed to change the point data format if (set_point_data_format != -1) { if (set_point_data_format < 0 || set_point_data_format > 10) { fprintf(stderr, "ERROR: unknown point_data_format %d\n", set_point_data_format); byebye(true); } // depending on the conversion we may need to copy the point if (convert_point_type_from_to[lasreader->header.point_data_format][set_point_data_format]) { if (point == 0) point = new LASpoint; } lasreader->header.point_data_format = (U8)set_point_data_format; lasreader->header.clean_laszip(); switch (lasreader->header.point_data_format) { case 0: lasreader->header.point_data_record_length = 20; break; case 1: lasreader->header.point_data_record_length = 28; break; case 2: lasreader->header.point_data_record_length = 26; break; case 3: lasreader->header.point_data_record_length = 34; break; case 4: lasreader->header.point_data_record_length = 57; break; case 5: lasreader->header.point_data_record_length = 63; break; case 6: lasreader->header.point_data_record_length = 30; break; case 7: lasreader->header.point_data_record_length = 36; break; case 8: lasreader->header.point_data_record_length = 38; break; case 9: lasreader->header.point_data_record_length = 59; break; case 10: lasreader->header.point_data_record_length = 67; break; } } // are we supposed to change the point data record length if (set_point_data_record_length != -1) { I32 num_extra_bytes = 0; switch (lasreader->header.point_data_format) { case 0: num_extra_bytes = set_point_data_record_length - 20; break; case 1: num_extra_bytes = set_point_data_record_length - 28; break; case 2: num_extra_bytes = set_point_data_record_length - 26; break; case 3: num_extra_bytes = set_point_data_record_length - 34; break; case 4: num_extra_bytes = set_point_data_record_length - 57; break; case 5: num_extra_bytes = set_point_data_record_length - 63; break; case 6: num_extra_bytes = set_point_data_record_length - 30; break; case 7: num_extra_bytes = set_point_data_record_length - 36; break; case 8: num_extra_bytes = set_point_data_record_length - 38; break; case 9: num_extra_bytes = set_point_data_record_length - 59; break; case 10: num_extra_bytes = set_point_data_record_length - 67; break; } if (num_extra_bytes < 0) { fprintf(stderr, "ERROR: point_data_format %d needs record length of at least %d\n", lasreader->header.point_data_format, set_point_data_record_length - num_extra_bytes); byebye(true); } if (lasreader->header.point_data_record_length < set_point_data_record_length) { if (!point) point = new LASpoint; } lasreader->header.point_data_record_length = (U16)set_point_data_record_length; lasreader->header.clean_laszip(); } // if the point needs to be copied set up the data fields if (point) { point->init(&lasreader->header, lasreader->header.point_data_format, lasreader->header.point_data_record_length); } // maybe we should remove some stuff if (remove_extra_header) { lasreader->header.clean_user_data_in_header(); lasreader->header.clean_user_data_after_header(); } if (remove_all_variable_length_records) { lasreader->header.clean_vlrs(); } else { if (remove_variable_length_record != -1) { lasreader->header.remove_vlr(remove_variable_length_record); } if (remove_variable_length_record_from != -1) { for (i = remove_variable_length_record_to; i >= remove_variable_length_record_from; i--) { lasreader->header.remove_vlr(i); } } } if (remove_tiling_vlr) { lasreader->header.clean_lastiling(); } if (remove_original_vlr) { lasreader->header.clean_lasoriginal(); } // maybe we should add / change the projection information LASquantizer* reproject_quantizer = 0; LASquantizer* saved_quantizer = 0; if (geoprojectionconverter.has_projection(true) || geoprojectionconverter.has_projection(false)) { if (!geoprojectionconverter.has_projection(true) && lasreader->header.vlr_geo_keys) { geoprojectionconverter.set_projection_from_geo_keys(lasreader->header.vlr_geo_keys[0].number_of_keys, (GeoProjectionGeoKeys*)lasreader->header.vlr_geo_key_entries, lasreader->header.vlr_geo_ascii_params, lasreader->header.vlr_geo_double_params); } if (geoprojectionconverter.has_projection(true) && geoprojectionconverter.has_projection(false)) { reproject_quantizer = new LASquantizer(); double point[3]; point[0] = (lasreader->header.min_x+lasreader->header.max_x)/2; point[1] = (lasreader->header.min_y+lasreader->header.max_y)/2; point[2] = (lasreader->header.min_z+lasreader->header.max_z)/2; geoprojectionconverter.to_target(point); reproject_quantizer->x_scale_factor = geoprojectionconverter.get_target_precision(); reproject_quantizer->y_scale_factor = geoprojectionconverter.get_target_precision(); reproject_quantizer->z_scale_factor = lasreader->header.z_scale_factor; reproject_quantizer->x_offset = ((I64)((point[0]/reproject_quantizer->x_scale_factor)/10000000))*10000000*reproject_quantizer->x_scale_factor; reproject_quantizer->y_offset = ((I64)((point[1]/reproject_quantizer->y_scale_factor)/10000000))*10000000*reproject_quantizer->y_scale_factor; reproject_quantizer->z_offset = ((I64)((point[2]/reproject_quantizer->z_scale_factor)/10000000))*10000000*reproject_quantizer->z_scale_factor; } int number_of_keys; GeoProjectionGeoKeys* geo_keys = 0; int num_geo_double_params; double* geo_double_params = 0; if (geoprojectionconverter.get_geo_keys_from_projection(number_of_keys, &geo_keys, num_geo_double_params, &geo_double_params, !geoprojectionconverter.has_projection(false))) { lasreader->header.set_geo_keys(number_of_keys, (LASvlr_key_entry*)geo_keys); free(geo_keys); if (geo_double_params) { lasreader->header.set_geo_double_params(num_geo_double_params, geo_double_params); free(geo_double_params); } else { lasreader->header.del_geo_double_params(); } lasreader->header.del_geo_ascii_params(); } } // do we need an extra pass BOOL extra_pass = laswriteopener.is_piped(); // for piped output we need an extra pass if (extra_pass) { if (lasreadopener.is_piped()) { fprintf(stderr, "ERROR: input and output cannot both be piped\n"); usage(true); } if (verbose) fprintf(stderr, "extra pass for piped output: reading %lld points ...\n", lasreader->npoints); // maybe seek to start position if (subsequence_start) lasreader->seek(subsequence_start); while (lasreader->read_point()) { if (lasreader->p_count > subsequence_stop) break; if (clip_to_bounding_box) { if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z)) { continue; } } if (reproject_quantizer) { lasreader->point.compute_coordinates(); geoprojectionconverter.to_target(lasreader->point.coordinates); lasreader->point.compute_XYZ(reproject_quantizer); } lasinventory.add(&lasreader->point); } lasreader->close(); lasreader->header.number_of_point_records = lasinventory.number_of_point_records; for (i = 0; i < 5; i++) lasreader->header.number_of_points_by_return[i] = lasinventory.number_of_points_by_return[i+1]; if (reproject_quantizer) lasreader->header = *reproject_quantizer; lasreader->header.max_x = lasreader->header.get_x(lasinventory.max_X); lasreader->header.min_x = lasreader->header.get_x(lasinventory.min_X); lasreader->header.max_y = lasreader->header.get_y(lasinventory.max_Y); lasreader->header.min_y = lasreader->header.get_y(lasinventory.min_Y); lasreader->header.max_z = lasreader->header.get_z(lasinventory.max_Z); lasreader->header.min_z = lasreader->header.get_z(lasinventory.min_Z); // if (verbose) { fprintf(stderr,"extra pass took %g sec.\n", taketime()-start_time); start_time = taketime(); } if (verbose) fprintf(stderr, "piped output: reading %lld and writing %d points ...\n", lasreader->npoints, lasinventory.number_of_point_records); } else { if (reproject_quantizer) { saved_quantizer = new LASquantizer(); *saved_quantizer = lasreader->header; lasreader->header = *reproject_quantizer; } //if (verbose) fprintf(stderr, "reading %lld and writing all surviving points ...\n", lasreader->npoints); } // check output if (!laswriteopener.active()) { // create name from input name laswriteopener.make_file_name(lasreadopener.get_file_name()); } // prepare the header for the surviving points strncpy(lasreader->header.system_identifier, "LAStools (c) by rapidlasso GmbH", 32); lasreader->header.system_identifier[31] = '\0'; char temp[64]; sprintf(temp, "las2las (version %d)", LAS_TOOLS_VERSION); strncpy(lasreader->header.generating_software, temp, 32); lasreader->header.generating_software[31] = '\0'; LASwriter* laswriter = 0; // open laswriter if(is_mpi){ // remove any existing out file, before opening with MPI_File_open if(rank==0){ remove(laswriteopener.get_file_name()); } MPI_Barrier(MPI_COMM_WORLD); } laswriter = laswriteopener.open(&lasreader->header); if (laswriter == 0) { fprintf(stderr, "ERROR: could not open laswriter\n"); byebye(true, argc==1); } // ************************************************************************************************** if(is_mpi == 1){ // jdw, we do this because only rank 0 now writes the header in laswriter_las.cpp MPI_File fh = laswriter->get_MPI_File(); MPI_Offset offset; //MPI_File_get_position(fh, &offset); //printf ("offset %lld, rank %i fh %lld\n", offset, rank, fh); if(rank==0){ MPI_File_get_position(fh, &offset); } MPI_Bcast(&offset, 1, MPI_OFFSET, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); MPI_File_seek(fh, offset, MPI_SEEK_SET); } // **************************************************************************************************** // for piped output we need to re-open the input file if (extra_pass) { if (!lasreadopener.reopen(lasreader)) { fprintf(stderr, "ERROR: could not re-open lasreader\n"); byebye(true); } } else { if (reproject_quantizer) { lasreader->header = *saved_quantizer; delete saved_quantizer; } } // maybe seek to start position if (subsequence_start) lasreader->seek(subsequence_start); // loop over points if (point) { while (lasreader->read_point()) { if (lasreader->p_count > subsequence_stop) break; if (clip_to_bounding_box) { if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z)) { continue; } } if (reproject_quantizer) { lasreader->point.compute_coordinates(); geoprojectionconverter.to_target(lasreader->point.coordinates); lasreader->point.compute_XYZ(reproject_quantizer); } *point = lasreader->point; laswriter->write_point(point); // without extra pass we need inventory of surviving points if (!extra_pass) laswriter->update_inventory(point); } delete point; point = 0; } else // ***************************** MPI ******************************************************** { // ***** Determine the start and stop points for this process ***** I64 left_over_count = lasreader->npoints % process_count; I64 process_points = lasreader->npoints / process_count; subsequence_start = rank*process_points; subsequence_stop = subsequence_start + process_points; if(rank == process_count-1) subsequence_stop += left_over_count; // ***** Set the input stream file offset for this process ***** // subsequence_start parameter gets cast to U32 in the implementation of seek and overflows for large files // manually set the file offset instead for now //((LASreaderLAS*)lasreader)->stream->seek(subsequence_start); I64 header_end_read_position = lasreader->get_Stream()->tell(); //printf("header end %lld subseqence_start * 28 %lld rank %i\n", header_end_read_position, subsequence_start*28, rank); lasreader->p_count = subsequence_start; lasreader->get_Stream()->seek(header_end_read_position + subsequence_start*28); //printf("seek pos first loop %lld rank %i\n", lasreader->get_Stream()->tell(), rank); if (verbose) fprintf(stderr, "reading %lli points, rank %i\n", subsequence_stop - subsequence_start, rank); // *****Read the file for the first time ***** // this first read and filter of the file is to gather a count of points that pass the filter so that // write offsets can be set. I64 filtered_count = 0; //while (lasreader->read_point()){ lasreader->MPI_END_POINT = subsequence_stop; while (lasreader->read_point()) { filtered_count++; } // ***** Gather and set the write offset for this process ***** I64* filtered_counts = (I64*)malloc(process_count * sizeof(I64)); if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); filtered_counts[rank] = filtered_count; if(is_mpi)MPI_Allgather(&filtered_count, 1, MPI_LONG_LONG, filtered_counts, 1, MPI_LONG_LONG, MPI_COMM_WORLD); if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); if(debug) printf("filtered count %lli rank %i\n", filtered_counts[rank], rank); if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); I64 write_point_offset = 0; for (int k=0; k < rank; k++){ write_point_offset += filtered_counts[k]; } if(is_mpi){ MPI_File fh = laswriter->get_MPI_File(); MPI_Offset cur = 0; // jdw, todo, remove the hardcoding by adding methods to read point size from reader MPI_File_seek(fh, write_point_offset*28, MPI_SEEK_CUR); if(debug){ MPI_File_get_position(fh, &cur); printf ("rank %i, write offset %lld\n", rank, write_point_offset*28); } } if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); // ***** Read and filter the input file again, this time write the filtered point since output file offset in now known amd set ***** //lasreader->seek(subsequence_start); // subsequence_start parameter gets cast to U32 in the implementation and overflows for large files // manually set the file offset instead for now //printf("header end %lld subseqence_start * 28 %lld rank %i\n", header_end_read_position, subsequence_start*28, rank); lasreader->p_count = subsequence_start; lasreader->get_Stream()->seek(header_end_read_position + subsequence_start*28); //printf("seek pos second loop %lld rank %i\n", lasreader->get_Stream()->tell(), rank); lasreader->MPI_END_POINT = subsequence_stop; while (lasreader->read_point()) { //if (lasreader->p_count > subsequence_stop) break; //if (clip_to_bounding_box) //{ // if (!lasreader->point.inside_box(lasreader->header.min_x, lasreader->header.min_y, lasreader->header.min_z, lasreader->header.max_x, lasreader->header.max_y, lasreader->header.max_z)) // { // continue; // } // } if (reproject_quantizer) { lasreader->point.compute_coordinates(); geoprojectionconverter.to_target(lasreader->point.coordinates); lasreader->point.compute_XYZ(reproject_quantizer); } laswriter->write_point(&lasreader->point); // without extra pass we need inventory of surviving points if (!extra_pass){ laswriter->update_inventory(&lasreader->point); } } //***** this is part of an mpi write optimization ***** laswriter->get_Stream()->flushBytes(); } // without the extra pass we need to fix the header now // ***** do the inventory reconciliation ***** // ***** Reduce inventory information in rank 0 ***** if (is_mpi){ U32 number_of_point_records = 0; U32 number_of_points_by_return[8]; for(int i = 0; i<8; i++)number_of_points_by_return[i] = 0; I32 max_X = 0; I32 min_X = 0; I32 max_Y = 0; I32 min_Y = 0; I32 max_Z = 0; I32 min_Z = 0; MPI_Reduce(&laswriter->inventory.number_of_point_records, &number_of_point_records, 1, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(laswriter->inventory.number_of_points_by_return, number_of_points_by_return, 8, MPI_UNSIGNED, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.max_X, &max_X, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.min_X, &min_X, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.max_Y, &max_Y, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.min_Y, &min_Y, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.max_Z, &max_Z, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&laswriter->inventory.min_Z, &min_Z, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD); if (rank ==0){ laswriter->inventory.number_of_point_records = number_of_point_records; for(int i=0; i<8; i++)laswriter->inventory.number_of_points_by_return[i] = number_of_points_by_return[i]; laswriter->inventory.max_X = max_X; laswriter->inventory.min_X = min_X; laswriter->inventory.max_Y = max_Y; laswriter->inventory.min_Y = min_Y; laswriter->inventory.max_Z = max_Z; laswriter->inventory.min_Z = min_Z; } } if(rank == 0){ if (!extra_pass) { if (reproject_quantizer) lasreader->header = *reproject_quantizer; laswriter->update_header(&lasreader->header, TRUE); } } if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); if (verbose) { fprintf(stderr,"%lli surviving points written by rank: %i\n", laswriter->p_count, rank); } laswriter->close(FALSE); if(is_mpi)MPI_Barrier(MPI_COMM_WORLD); delete laswriter; lasreader->close(); delete lasreader; if (reproject_quantizer) delete reproject_quantizer; } if(is_mpi)MPI_Finalize(); time(&wall_end_time); if (verbose) { fprintf(stderr,"total time %.f sec, cpu time: %g sec. rank: %i\n", difftime(wall_end_time, wall_start_time), taketime()-start_time, rank); } return 0; }
int main(int argc, char **argv) { int size, rank, rc, root = 0, nameLength = 20; MPI_Status status; MPI_File configFile = malloc(sizeof configFile); MPI_Info info; char *configFileName = "./configFile.txt"; createLogFile(); MPI_Init(&argc, &argv); initLogFile(); MPI_Info_create(&info); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); printf("%d/%d started.\n", rank+1, size); char buf[nameLength + 1]; rc = MPI_File_open(MPI_COMM_WORLD, configFileName, MPI_MODE_RDONLY, info, &configFile); printf("%d/%d achieved the file_open result: %d.\n", rank+1, size, rc); // set the individual pointer to our position in the config file // master is the master of elevators rc = MPI_File_seek(configFile, rank * nameLength, MPI_SEEK_SET); rc = MPI_File_read(configFile, buf, nameLength, MPI_CHAR, &status); buf[nameLength] = '\0'; int len = nameLength - 1; while ((len >= 0) && (buf[len] == ' ')) { buf[len] = '\0'; len--; } rc = MPI_File_close(&configFile); char *ourname = buf; char str[50 + nameLength]; printf(strcat (strcat (strcpy (str, "%d/%d has the name '"), ourname), "'.\n"), rank+1, size); //read in the file to be counted, line by line. FILE * fp; fp = fopen(argv[1], "r"); if (fp == NULL) { printf("%d/%d did not find a document to count! Switching to assignment part 1.\n", rank+1, size); assgn = 1; } else { printf("%d/%d found a document to count. Switching to assignment part 2.\n", rank+1, size); assgn = 2; /* // general idea: char * line = NULL; size_t lineLen = 0; ssize_t read; dlist* list = NULL; dlist_create(10, &list); printf("size of list is %d\n", list->size); for (int c = 0; c < 15; c++) { char buf[6]; // long enough for test + num, e.g. test1 sprintf(buf, "test%d", c); dlist_append(&list, buf); } char* thestring; printf("capacity is %d\n", list->capacity); for (int c = 0; c < 15; c++) { dlist_get(&list, c, &thestring); printf("string is: %s\n", thestring); } while ((read = getline(&line, &lineLen, fp)) != -1) { printf("Retrieved line of length %zu :\n", read); printf("%s", line); } if (line) free(line); */ char * line = NULL; size_t lineLen = 0; ssize_t read; dlist_create(10, &list); // go through the lines in the file... int i = 0; while ((read = getline(&line, &lineLen, fp)) != -1) { i++; // ... and map them to the individual worker threads if (i % (size-1) == rank-1) { dlist_append(&list, line); } } if (line) free(line); } // we check whether or not we are the root process. if (rank == root) { master(rank); } else { worker(rank, ourname); } closeLogFile(); printf("%d/%d ended.\n", rank+1, size); MPI_Finalize(); }
void do_collective_read() { MPI_Info info; MPI_Datatype contig; MPI_Comm sub_read_comm; MPI_File fh; char coll_path[PATH_LEN]; int sub_comm_size, sub_rank, sub_comm_color; int disp; int rc; int *buf; ptimes[0].start = MPI_Wtime(); ptimes[1].start = MPI_Wtime(); sub_comm_color = get_sub_collective_io_comm(&sub_read_comm); /* Construct a datatype for distributing the input data across all * processes. */ MPI_Type_contiguous(data_size / sizeof(int), MPI_INT, &contig); MPI_Type_commit(&contig); /* Set the stripe_count and stripe_size, that is, the striping_factor * and striping_unit. Both keys and values for MPI_Info_set must be * in the form of ascii strings. */ MPI_Info_create(&info); // MPI_Info_set(info, "striping_factor", striping_factor); // MPI_Info_set(info, "striping_unit", striping_unit); MPI_Info_set(info, "romio_cb_read", "enable"); // MPI_Info_set(info, "romio_cb_read", "disable"); /* Get path to the target file of the communicator */ MPI_Comm_size(sub_read_comm, &sub_comm_size); get_coll_io_path(coll_path, sub_comm_color); /* Delete the output file if it exists so that striping can be set * on the output file. */ // rc = MPI_File_delete(coll_path, info); /* Create read data*/ MPI_Comm_rank(sub_read_comm, &sub_rank); buf = create_io_data(-1); ptimes[1].end = MPI_Wtime(); MPI_Barrier(MPI_COMM_WORLD); /* Open the file */ ptimes[2].start = MPI_Wtime(); rc = MPI_File_open(sub_read_comm, coll_path, MPI_MODE_RDONLY, info, &fh); if (rc != MPI_SUCCESS) { gio_err("MPI_File_open failed: %s (%s:%s:%d)", coll_path, __FILE__, __func__, __LINE__); } ptimes[2].end = MPI_Wtime(); /* Set the file view for the output file. In this example, we will * use the same contiguous datatype as we used for reading the data * into local memory. A better example would be to read out just * part of the data, say 4 contiguous elements followed by a gap of * 4 elements, and repeated. */ ptimes[3].start = MPI_Wtime(); #ifdef GIO_LARGE_FILE int i; for (i = 0; i < sub_rank; i++) { MPI_File_seek(fh, data_size, MPI_SEEK_CUR); } #else disp = sub_rank * data_size; MPI_File_set_view(fh, disp, contig, contig, "native", info); #endif if (rc != MPI_SUCCESS) { gio_err("MPI_File_set_view failed (%s:%s:%d)", __FILE__, __func__, __LINE__); } ptimes[3].end = MPI_Wtime(); /* MPI Collective Read */ ptimes[4].start = MPI_Wtime(); rc = MPI_File_read_all(fh, buf, 1, contig, MPI_STATUS_IGNORE); if (rc != MPI_SUCCESS) { gio_err("MPI_File_set_view failed (%s:%s:%d)", __FILE__, __func__, __LINE__); } ptimes[4].end = MPI_Wtime(); validate_io_data(buf, sub_rank); /*Free data*/ free_io_data(buf); /* Close Files */ ptimes[5].start = MPI_Wtime(); MPI_File_close(&fh); ptimes[5].end = MPI_Wtime(); ptimes[0].end = MPI_Wtime(); print_results(); return; }
//#include "graph.hpp" void process_files() { std::vector<string> files=getallfilenames("/work/scratch/vv52zasu/inputfiles/"); //std::vector<string> files=getallfilenames("/home/vv52zasu/mpi/inputfiles/"); MPI::Status status; int myrank = MPI::COMM_WORLD.Get_rank(); int size = MPI::COMM_WORLD.Get_size(); int filecount=0; /*//////Read files in a loop and write initial data to localmap/////*/ for(std::vector<string>::iterator it = files.begin(); it != files.end(); ++it) { if(myrank ==0) std::cout<<"Processing file:"<<(*it).c_str()<<endl; MPI::File thefile = MPI::File::Open(MPI::COMM_WORLD, (*it).c_str(), MPI::MODE_RDONLY, MPI::INFO_NULL); MPI::Offset filesize = thefile.Get_size(); char *bufchar, *bufchar_header; int CHUNKSIZE = (filesize/size)+1; CHUNKSIZE = std::max(CHUNKSIZE, 10000); bufchar = new char[CHUNKSIZE+300000]; bufchar_header = bufchar; bufchar = bufchar + 300000; MPI_Status status1; MPI_File_seek(thefile, (myrank)*CHUNKSIZE, MPI_SEEK_SET); MPI_File_read( thefile, bufchar, CHUNKSIZE, MPI_CHAR, &status1); int count=0; MPI_Get_count( &status1, MPI_CHAR, &count ); MPI::COMM_WORLD.Barrier(); char * pch, *lastsentence; pch=strchr(bufchar,'\n'); while (pch!=NULL) { if(*(pch+1)=='\n'){ lastsentence = pch+2;} pch=strchr(pch+1,'\n'); } int sendcharcount = count -( lastsentence - bufchar ); if(sendcharcount < 0 || sendcharcount > 300000) sendcharcount =0; //cout << "CHUNKSIZE: "<< CHUNKSIZE << "count: " << count << " sendcharcount: " << sendcharcount << endl; //cout << lastsentence << endl; char *recvptr; recvptr = new char[300000]; int dest=0,src=0; if(myrank==size-1) { dest=0; src=myrank-1; } else if(myrank==0) { dest=1;src=size-1; } else { dest=myrank+1;src=myrank-1; } //if(sendcharcount >= 300000) cout <<"Process: " << myrank << " sendcharcount:" << sendcharcount <<endl<< lastsentence<<endl;; MPI_Sendrecv(lastsentence, sendcharcount, MPI_CHAR, dest, 123, recvptr, CHUNKSIZE, MPI_CHAR, src, 123, MPI_COMM_WORLD, &status1); //MPI::COMM_WORLD.Sendrecv(lastsentence, sendcharcount, MPI_CHAR, dest, 123, recvptr, CHUNKSIZE, MPI_CHAR, src, 123, status); int recvcount=0; MPI_Get_count( &status1, MPI_CHAR, &recvcount ); //cout << "Process: " << myrank << ". Recvcount: " << recvcount << endl; //int recvcount = strlen(recvptr); bufchar = bufchar -recvcount; if(recvcount >= 300000) cout << "Process: " << myrank << " DUDE wtf1 man viswanath"<<endl; memcpy(bufchar, recvptr, recvcount); int finalcount = lastsentence - bufchar; // cout << "Final count: " << finalcount << " total allocated: " << CHUNKSIZE<< " count + recvcount - sendcharcount: " << count+ recvcount -sendcharcount<< endl; if( finalcount > CHUNKSIZE+300000) { cout << "Process: " << myrank << " DUDE wtf man viswanath. "<< "Final Count: "<<finalcount<<endl; // if(myrank==32||myrank ==94){ // for(int i=0;i<100; i++) // cout<<bufchar[i]; // cout <<endl; // } } long unsigned int destsize = compressBound(finalcount); unsigned char *compressedstr = new unsigned char[destsize]; int result = compress(compressedstr, &destsize, (unsigned char*)bufchar, finalcount); compressedvector.push_back(std::make_tuple(compressedstr, destsize, finalcount)); string finalstr(bufchar, finalcount ); // //cout << recvcount << endl; delete[] recvptr; // //cout << finalstr<<endl; process_string(finalstr, localmap, frequencymap); int msize = (int)mapsize(localmap)+(int)((frequencymap.size()* 20)/(1024*1024)); int max; MPI_Reduce(&msize, &max, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD); delete[] (bufchar_header); if(myrank ==0){ cout<<"MaxMapsize: "<<max<<endl; std::cout<<"Processing file Ended: "<<(*it).c_str()<<endl; } filecount++; //if(filecount%100 == 0)process_firstlevel(myrank, size); } }
JNIEXPORT void JNICALL Java_mpi_File_seek( JNIEnv *env, jobject jthis, jlong fh, jlong offset, jint whence) { int rc = MPI_File_seek((MPI_File)fh, (MPI_Offset)offset, whence); ompi_java_exceptionCheck(env, rc); }
int main(int argc, char **argv) { double *xy; double mySUMx, mySUMy, mySUMxy, mySUMxx, SUMx, SUMy, SUMxy, SUMxx, SUMres, res, slope, y_intercept, y_estimate, begin, end; int i, j, n, myid, numprocs, naverage, nremain, mypoints, sizeFile, ret; /*int new_sleep (int seconds);*/ MPI_Status istatus; MPI_Datatype MPI_POINT; MPI_File infile; MPI_Offset ishift; MPI_Init(&argc, &argv); MPI_Comm_rank (MPI_COMM_WORLD, &myid); MPI_Comm_size (MPI_COMM_WORLD, &numprocs); MPI_Type_contiguous(2, MPI_DOUBLE, &MPI_POINT); MPI_Type_commit(&MPI_POINT); ret = MPI_File_open(MPI_COMM_WORLD, "1.in", MPI_MODE_RDONLY, MPI_INFO_NULL, &infile); if (ret == 0) printf("Arquivo aberto com sucesso no processo %d \n", myid); else { printf("Arquivo aberto com erro no processo %d \n", myid); MPI_Abort(MPI_COMM_WORLD, 1); } /* ---------------------------------------------------------- * Step 1: Process 0 reads data and sends the value of n * ---------------------------------------------------------- */ MPI_File_seek(infile, 0, MPI_SEEK_SET); ret = MPI_File_read(infile, &n, 1, MPI_INT, &istatus); if (ret == 0) printf("Arquivo lido com sucesso no processo %d \n", myid); else { printf("Arquivo lido com erro no processo %d \n", myid); MPI_Abort(MPI_COMM_WORLD, 1); } naverage = n / numprocs; nremain = n % numprocs; // printf("%d - %d - %d - %d - %d\n", myid, n, naverage, nremain, 2 * (naverage + nremain)); xy = (double *) malloc (2 * (naverage + nremain) * sizeof(double)); /*if (myid == 0) { printf ("Number of processes used: %d\n", numprocs); printf ("-------------------------------------\n"); printf ("The x coordinates on worker processes:\n"); fscanf (infile, "%d", &n); x = (double *) malloc (n*sizeof(double)); y = (double *) malloc (n*sizeof(double)); for (i=0; i<n; i++) fscanf (infile, "%lf %lf", &x[i], &y[i]); for (i=1; i<numprocs; i++) MPI_Send (&n, 1, MPI_INT, i, 10, MPI_COMM_WORLD); } else { MPI_Recv (&n, 1, MPI_INT, 0, 10, MPI_COMM_WORLD, &istatus); x = (double *) malloc (n*sizeof(double)); y = (double *) malloc (n*sizeof(double)); }*/ /* ---------------------------------------------------------- */ /* ---------------------------------------------------------- * Step 2: Process 0 sends subsets of x and y * ---------------------------------------------------------- */ if (myid == 0) GET_TIME(begin); ishift = myid * naverage; mypoints = (myid < numprocs - 1) ? naverage : naverage + nremain; //MPI_File_set_view(infile, ishift, MPI_POINT, MPI_DOUBLE, "native", MPI_INFO_NULL); MPI_File_seek(infile, ishift * sizeof(double) * 2, MPI_SEEK_CUR); MPI_File_read(infile, &xy[0], mypoints, MPI_POINT, &istatus); /*for (i = 0; i < mypoints * 2; i += 2) { printf("(%d) %d: ", myid, i); printf("%.0lf ", xy[i]); printf("%.0lf\n", xy[i + 1]); } printf("\n");*/ /*if (myid == 0) { for (i=1; i<numprocs; i++) { ishift = i * naverage; mypoints = (i < numprocs - 1) ? naverage : naverage + nremain; MPI_Send (&ishift, 1, MPI_INT, i, 1, MPI_COMM_WORLD); MPI_Send (&mypoints, 1, MPI_INT, i, 2, MPI_COMM_WORLD); MPI_Send (&x[ishift], mypoints, MPI_DOUBLE, i, 3, MPI_COMM_WORLD); MPI_Send (&y[ishift], mypoints, MPI_DOUBLE, i, 4, MPI_COMM_WORLD); } } else { MPI_Recv (&ishift, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &istatus); MPI_Recv (&mypoints, 1, MPI_INT, 0, 2, MPI_COMM_WORLD, &istatus); MPI_Recv (&x[ishift], mypoints, MPI_DOUBLE, 0, 3, MPI_COMM_WORLD, &istatus); MPI_Recv (&y[ishift], mypoints, MPI_DOUBLE, 0, 4, MPI_COMM_WORLD, &istatus); printf ("id %d: ", myid); for (i=0; i<n; i++) printf("%4.2lf ", x[i]); printf ("\n"); }*/ /* ---------------------------------------------------------- * Step 3: Each process calculates its partial sum * ---------------------------------------------------------- */ mySUMx = 0; mySUMy = 0; mySUMxy = 0; mySUMxx = 0; if (myid == 0) { ishift = 0; mypoints = naverage; } for (j = 0; j < mypoints * 2; j += 2) { mySUMx += xy[j]; mySUMy += xy[j + 1]; mySUMxy += xy[j] * xy[j + 1]; mySUMxx += xy[j] * xy[j]; } // printf("%d:\t%lf - %lf - %lf - %lf\n", myid, mySUMx, mySUMy, mySUMxy, mySUMxx); /* ---------------------------------------------------------- * Step 4: Process 0 receives partial sums from the others * ---------------------------------------------------------- */ MPI_Reduce(&mySUMx, &SUMx, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&mySUMy, &SUMy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&mySUMxy, &SUMxy, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce(&mySUMxx, &SUMxx, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); if (myid == 0) GET_TIME(end); /*if (myid != 0) { MPI_Send (&mySUMx, 1, MPI_DOUBLE, 0, 5, MPI_COMM_WORLD); MPI_Send (&mySUMy, 1, MPI_DOUBLE, 0, 6, MPI_COMM_WORLD); MPI_Send (&mySUMxy, 1, MPI_DOUBLE, 0, 7, MPI_COMM_WORLD); MPI_Send (&mySUMxx, 1, MPI_DOUBLE, 0, 8, MPI_COMM_WORLD); } else { SUMx = mySUMx; SUMy = mySUMy; SUMxy = mySUMxy; SUMxx = mySUMxx; for (i=1; i<numprocs; i++) { MPI_Recv (&mySUMx, 1, MPI_DOUBLE, i, 5, MPI_COMM_WORLD, &istatus); MPI_Recv (&mySUMy, 1, MPI_DOUBLE, i, 6, MPI_COMM_WORLD, &istatus); MPI_Recv (&mySUMxy, 1, MPI_DOUBLE, i, 7, MPI_COMM_WORLD, &istatus); MPI_Recv (&mySUMxx, 1, MPI_DOUBLE, i, 8, MPI_COMM_WORLD, &istatus); SUMx = SUMx + mySUMx; SUMy = SUMy + mySUMy; SUMxy = SUMxy + mySUMxy; SUMxx = SUMxx + mySUMxx; } }*/ /* ---------------------------------------------------------- * Step 5: Process 0 does the final steps * ---------------------------------------------------------- */ if (myid == 0) { slope = (SUMx * SUMy - n * SUMxy ) / (SUMx * SUMx - n * SUMxx); y_intercept = (SUMy - slope * SUMx) / n; printf ("\n"); printf ("The linear equation that best fits the given data:\n"); printf (" y = %6.2lfx + %6.2lf\n", slope, y_intercept); printf ("--------------------------------------------------\n"); printf (" Original (x, y) Estimated y Residual\n"); printf ("--------------------------------------------------\n"); SUMres = 0; } MPI_Bcast(&slope, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&y_intercept, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); for (j = 0; j < numprocs; j++) { MPI_Barrier(MPI_COMM_WORLD); if (j == myid) { SUMres = 0; for (i = 0; i < mypoints * 2; i += 2) { y_estimate = slope * xy[i] + y_intercept; res = xy[i + 1] - y_estimate; SUMres = SUMres + res * res; printf(" (%6.2lf %6.2lf) %6.2lf %6.2lf\n", xy[i], xy[i + 1], y_estimate, res); } } } MPI_Barrier(MPI_COMM_WORLD); if (myid == 0) { printf("--------------------------------------------------\n"); printf("Residual sum = %6.2lf\n", SUMres); printf("Time: %lf\n", end - begin); } MPI_File_close(&infile); MPI_Finalize(); }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int initFlag; MPI_Initialized(&initFlag); if (!initFlag) { printf("MPI init failed\n"); return 8; } MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); int l,mm=5; int nx,ny,nz,lt,nedge; float frequency; float velmax; float dt; int ncx_shot1,ncy_shot1,ncz_shot; int ishot,ncy_shot,ncx_shot; float unit; int nxshot,nyshot,dxshot,dyshot; char infile[80],outfile[80],logfile[80],tmp[80], nodelog[84]; FILE *fin, *fout, *flog, *fnode; MPI_File mpi_flog, mpi_fout; MPI_Status mpi_status; struct timeval start,end; float all_time; float *u, *v, *w, *up, *up1, *up2, *vp, *vp1, *vp2, *wp, *wp1, *wp2, *us, *us1, *us2, *vs, *vs1, *vs2, *ws, *ws1, *ws2, *vpp, *density, *vss; float c[5][7]; float *wave; float nshot,t0,tt,c0; float dtx,dtz,dtxz,dr1,dr2,dtx4,dtz4,dtxz4; char message[100]; if(argc<4) { printf("please add 3 parameter: inpurfile, outfile, logfile\n"); exit(1); } message[99] = 0; // Avoid string buffer overrun strcpy(infile,argv[1]); strcpy(outfile,argv[2]); strcpy(logfile,argv[3]); strcpy(nodelog,logfile); strcat(nodelog, ".node"); strcpy(tmp,"date "); strncat(tmp, ">> ",3); strncat(tmp, logfile, strlen(logfile)); if (proc_rank == 0) { flog = fopen(logfile,"w"); fprintf(flog,"------------start time------------\n"); fclose(flog); system(tmp); gettimeofday(&start,NULL); } fin = fopen(infile,"r"); if(fin == NULL) { printf("file %s is not exist\n",infile); exit(2); } fscanf(fin,"nx=%d\n",&nx); fscanf(fin,"ny=%d\n",&ny); fscanf(fin,"nz=%d\n",&nz); fscanf(fin,"lt=%d\n",<); fscanf(fin,"nedge=%d\n",&nedge); fscanf(fin,"ncx_shot1=%d\n",&ncx_shot1); fscanf(fin,"ncy_shot1=%d\n",&ncy_shot1); fscanf(fin,"ncz_shot=%d\n",&ncz_shot); fscanf(fin,"nxshot=%d\n",&nxshot); fscanf(fin,"nyshot=%d\n",&nyshot); fscanf(fin,"frequency=%f\n",&frequency); fscanf(fin,"velmax=%f\n",&velmax); fscanf(fin,"dt=%f\n",&dt); fscanf(fin,"unit=%f\n",&unit); fscanf(fin,"dxshot=%d\n",&dxshot); fscanf(fin,"dyshot=%d\n",&dyshot); fclose(fin); if (proc_rank == 0) { // Master printf("\n--------workload parameter--------\n"); printf("nx=%d\n",nx); printf("ny=%d\n",ny); printf("nz=%d\n",nz); printf("lt=%d\n",lt); printf("nedge=%d\n",nedge); printf("ncx_shot1=%d\n",ncx_shot1); printf("ncy_shot1=%d\n",ncy_shot1); printf("ncz_shot=%d\n",ncz_shot); printf("nxshot=%d\n",nxshot); printf("nyshot=%d\n",nyshot); printf("frequency=%f\n",frequency); printf("velmax=%f\n",velmax); printf("dt=%f\n",dt); printf("unit=%f\n",unit); printf("dxshot=%d\n",dxshot); printf("dyshot=%d\n\n",dyshot); flog = fopen(logfile,"a"); fprintf(flog,"\n--------workload parameter--------\n"); fprintf(flog,"nx=%d\n",nx); fprintf(flog,"ny=%d\n",ny); fprintf(flog,"nz=%d\n",nz); fprintf(flog,"lt=%d\n",lt); fprintf(flog,"nedge=%d\n",nedge); fprintf(flog,"ncx_shot1=%d\n",ncx_shot1); fprintf(flog,"ncy_shot1=%d\n",ncy_shot1); fprintf(flog,"ncz_shot=%d\n",ncz_shot); fprintf(flog,"nxshot=%d\n",nxshot); fprintf(flog,"nyshot=%d\n",nyshot); fprintf(flog,"frequency=%f\n",frequency); fprintf(flog,"velmax=%f\n",velmax); fprintf(flog,"dt=%f\n",dt); fprintf(flog,"unit=%f\n",unit); fprintf(flog,"dxshot=%d\n",dxshot); fprintf(flog,"dyshot=%d\n\n",dyshot); fclose(flog); fnode = fopen(nodelog, "a"); fprintf(fnode,"World size: %d\n", world_size); fclose(fnode); } #ifdef _WITH_PHI // [Afa] It is recommended that for Intel Xeon Phi data is 64-byte aligned. // Upon successful completion, posix_memalign() shall return zero if (posix_memalign((void **)&u , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&v , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&w , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws2, 64, sizeof(float)*nz*ny*nx)) return 2; #else u = (float*)malloc(sizeof(float)*nz*ny*nx); v = (float*)malloc(sizeof(float)*nz*ny*nx); w = (float*)malloc(sizeof(float)*nz*ny*nx); up = (float*)malloc(sizeof(float)*nz*ny*nx); up1 = (float*)malloc(sizeof(float)*nz*ny*nx); up2 = (float*)malloc(sizeof(float)*nz*ny*nx); vp = (float*)malloc(sizeof(float)*nz*ny*nx); vp1 = (float*)malloc(sizeof(float)*nz*ny*nx); vp2 = (float*)malloc(sizeof(float)*nz*ny*nx); wp = (float*)malloc(sizeof(float)*nz*ny*nx); wp1 = (float*)malloc(sizeof(float)*nz*ny*nx); wp2 = (float*)malloc(sizeof(float)*nz*ny*nx); us = (float*)malloc(sizeof(float)*nz*ny*nx); us1 = (float*)malloc(sizeof(float)*nz*ny*nx); us2 = (float*)malloc(sizeof(float)*nz*ny*nx); vs = (float*)malloc(sizeof(float)*nz*ny*nx); vs1 = (float*)malloc(sizeof(float)*nz*ny*nx); vs2 = (float*)malloc(sizeof(float)*nz*ny*nx); ws = (float*)malloc(sizeof(float)*nz*ny*nx); ws1 = (float*)malloc(sizeof(float)*nz*ny*nx); ws2 = (float*)malloc(sizeof(float)*nz*ny*nx); #endif // [Afa] Those are not offloaded to phi yet vpp = (float*)malloc(sizeof(float)*nz*ny*nx); density = (float*)malloc(sizeof(float)*nz*ny*nx); vss = (float*)malloc(sizeof(float)*nz*ny*nx); wave = (float*)malloc(sizeof(float)*lt); nshot=nxshot*nyshot; t0=1.0/frequency; // [Afa] Branch optmization // TODO: Will compiler optimize the `condition'? // i.e Can I write `for(i=0;i< (nz < 210 ? nz : 210);i++)'? int condition = nz < 210 ? nz : 210; for(int i=0; i < condition;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=2300.; vss[i*ny*nx+j*nx+k]=1232.; density[i*ny*nx+j*nx+k]=1.; } } } condition = nz < 260 ? nz : 260; for(int i=210; i < condition;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=2800.; vss[i*ny*nx+j*nx+k]=1509.; density[i*ny*nx+j*nx+k]=2.; } } } for(int i=260;i<nz;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=3500.; vss[i*ny*nx+j*nx+k]=1909.; density[i*ny*nx+j*nx+k]=2.5; } } } for(l=0;l<lt;l++) { tt=l*dt; tt=tt-t0; float sp=PIE*frequency*tt; float fx=100000.*exp(-sp*sp)*(1.-2.*sp*sp); wave[l]=fx; } // TODO: [Afa] Data produced by code below are static. See table below if(mm==5) { c0=-2.927222164; c[0][0]=1.66666665; c[1][0]=-0.23809525; c[2][0]=0.03968254; c[3][0]=-0.004960318; c[4][0]=0.0003174603; } c[0][1]=0.83333; c[1][1]=-0.2381; c[2][1]=0.0595; c[3][1]=-0.0099; c[4][1]=0.0008; for(int i=0;i<5;i++) for(int j=0;j<5;j++) c[j][2+i]=c[i][1]*c[j][1]; /* * mm == 5, c = * 1.666667 0.833330 0.694439 -0.198416 0.049583 -0.008250 0.000667 * -0.238095 -0.238100 -0.198416 0.056692 -0.014167 0.002357 -0.000190 * 0.039683 0.059500 0.049583 -0.014167 0.003540 -0.000589 0.000048 * -0.004960 -0.009900 -0.008250 0.002357 -0.000589 0.000098 -0.000008 * 0.000317 0.000800 0.000667 -0.000190 0.000048 -0.000008 0.000001 */ /* * mm != 5, c = * 0.000000 0.833330 0.694439 -0.198416 0.049583 -0.008250 0.000667 * 0.000000 -0.238100 -0.198416 0.056692 -0.014167 0.002357 -0.000190 * 0.000000 0.059500 0.049583 -0.014167 0.003540 -0.000589 0.000048 * 0.000000 -0.009900 -0.008250 0.002357 -0.000589 0.000098 -0.000008 * 0.000000 0.000800 0.000667 -0.000190 0.000048 -0.000008 0.000001 */ dtx=dt/unit; dtz=dt/unit; dtxz=dtx*dtz; dr1=dtx*dtx/2.; dr2=dtz*dtz/2.; dtx4=dtx*dtx*dtx*dtx; dtz4=dtz*dtz*dtz*dtz; dtxz4=dtx*dtx*dtz*dtz; if (proc_rank == 0) { fout = fopen(outfile, "wb"); fclose(fout); } // [Afa] Truncate file. We need a prettier way MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, outfile, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_fout); MPI_File_open(MPI_COMM_WORLD, nodelog, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_flog); // [Afa] *About Nodes Number* nshot (i.e nxshot * nyshot) should be multiple of node numbers, // or there will be hungry processes int loop_per_proc = ((int)nshot % world_size == 0) ? (nshot / world_size) : (nshot / world_size + 1); printf("\x1B[31mDEBUG:\x1b[39;49m World size %d, Loop per Proc %d, nshot %f, I am No. %d\n", world_size, loop_per_proc, nshot, proc_rank); // for(ishot=1;ishot<=nshot;ishot++) // [Afa] nshot is 20 in para1.in, but 200 in para2.in for (int loop_index = 0; loop_index < loop_per_proc; ++loop_index) { ishot = loop_index + proc_rank * loop_per_proc + 1; // [Afa] See commented code 2 lines above to understand this line if (ishot <= nshot) { // [Afa] ishot <= nshot printf("shot %d, process %d\n",ishot, proc_rank); snprintf(message, 29, "shot %6d, process %6d\n", ishot, proc_rank); // [Afa] Those numbers: MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET); // 28: string without '\0' MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status); // 29: with '\0' } else { printf("shot HUNGRY, process %d\n", proc_rank); snprintf(message, 29, "shot HUNGRY, process %6d\n", proc_rank); MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET); MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status); continue; } ncy_shot=ncy_shot1+(ishot/nxshot)*dyshot; ncx_shot=ncx_shot1+(ishot%nxshot)*dxshot; // [Afa] Matrix is zeroed in every loop // i.e. The relation between those matrices in each loop is pretty loose // Matrices not zeroed are: vpp, density, vss and wave, and they're not changed (read-only) // We only need to partially collect matrix `up' // TODO: [Afa] Get a better way to pass those pointers, and mark them as `restrict' // And WHY are they using cpp as extension? C++11 doesn't support `restrict' zero_matrices(u, w, ws2, up2, vp1, wp1, us, ws, wp, us2, us1, wp2, v, up1, nz, nx, up, ny, ws1, vs, vp2, vs1, vs2, vp); for(l=1;l<=lt;l++) { float xmax=l*dt*velmax; int nleft=ncx_shot-xmax/unit-10; int nright=ncx_shot+xmax/unit+10; int nfront=ncy_shot-xmax/unit-10; int nback=ncy_shot+xmax/unit+10; int ntop=ncz_shot-xmax/unit-10; int nbottom=ncz_shot+xmax/unit+10; if(nleft<5) nleft=5; if(nright>nx-5) nright=nx-5; if(nfront<5) nfront=5; if(nback>ny-5) nback=ny-5; if(ntop<5) ntop=5; if(nbottom>nz-5) nbottom=nz-5; ntop = ntop-1; nfront = nfront-1; nleft = nleft-1; // Although up, vp, wp, us, vs, ws are modified below, we're sure there's no race condition. // Each loop accesses a UNIQUE element in the array, and the value is not used, no need to worry about the dirty cache #pragma omp parallel for shared(u) shared(v) shared(w) shared(up1) shared(up2) shared(vp1) shared(vp2) shared(wp1) \ shared(wp2) shared(us) shared(us1) shared(us2) shared(vs) shared(vs1) shared(vs2) shared(ws) shared(ws1) shared(ws2) \ shared(vss) shared(vpp) shared(dr1) shared(dr2) shared(dtz) shared(dtx) shared(ncx_shot) shared(ncy_shot) shared(ncz_shot) \ shared(wave) for(int k=ntop;k<nbottom;k++) { for(int j=nfront;j<nback;j++) { for(int i=nleft;i<nright;i++) { float vvp2,drd1,drd2,vvs2; float px,sx; if(i==ncx_shot-1&&j==ncy_shot-1&&k==ncz_shot-1) { px=1.; sx=0.; } else { px=0.; sx=0.; } vvp2=vpp[k*ny*nx+j*nx+i]*vpp[k*ny*nx+j*nx+i]; drd1=dr1*vvp2; drd2=dr2*vvp2; vvs2=vss[k*ny*nx+j*nx+i]*vss[k*ny*nx+j*nx+i]; drd1=dr1*vvs2; drd2=dr2*vvs2; float tempux2=0.0f; float tempuy2=0.0f; float tempuz2=0.0f; float tempvx2=0.0f; float tempvy2=0.0f; float tempvz2=0.0f; float tempwx2=0.0f; float tempwy2=0.0f; float tempwz2=0.0f; float tempuxz=0.0f; float tempuxy=0.0f; float tempvyz=0.0f; float tempvxy=0.0f; float tempwxz=0.0f; float tempwyz=0.0f; // This will make the compiler do the vectorization for(int kk=1;kk<=mm;kk++) { tempux2 += c[kk-1][0]*(u[k*ny*nx+j*nx+(i+kk)]+u[k*ny*nx+j*nx+(i-kk)]); tempuy2 += c[kk-1][0]*(u[k*ny*nx+(j+kk)*nx+i]+u[k*ny*nx+(j-kk)*nx+i]); tempuz2 += c[kk-1][0]*(u[(k+kk)*ny*nx+j*nx+i]+u[(k-kk)*ny*nx+j*nx+i]); } for(int kk=1;kk<=mm;kk++) { tempvx2 += c[kk-1][0]*(v[k*ny*nx+j*nx+(i+kk)]+v[k*ny*nx+j*nx+(i-kk)]); tempvy2 += c[kk-1][0]*(v[k*ny*nx+(j+kk)*nx+i]+v[k*ny*nx+(j-kk)*nx+i]); tempvz2 += c[kk-1][0]*(v[(k+kk)*ny*nx+j*nx+i]+v[(k-kk)*ny*nx+j*nx+i]); } for(int kk=1;kk<=mm;kk++) { tempwx2 += c[kk-1][0]*(w[k*ny*nx+j*nx+(i+kk)]+w[k*ny*nx+j*nx+(i-kk)]); tempwy2 += c[kk-1][0]*(w[k*ny*nx+(j+kk)*nx+i]+w[k*ny*nx+(j-kk)*nx+i]); tempwz2 += c[kk-1][0]*(w[(k+kk)*ny*nx+j*nx+i]+w[(k-kk)*ny*nx+j*nx+i]); } //for(kk=1;kk<=mm;kk++) end tempux2=(tempux2+c0*u[k*ny*nx+j*nx+i])*vvp2*dtx*dtx; // u[k][j][i] tempuy2=(tempuy2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; // u[k][j][i] tempuz2=(tempuz2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtz*dtz; // u[k][j][i] tempvx2=(tempvx2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempvy2=(tempvy2+c0*v[k*ny*nx+j*nx+i])*vvp2*dtx*dtx; tempvz2=(tempvz2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtz*dtz; tempwx2=(tempwx2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempwy2=(tempwy2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempwz2=(tempwz2+c0*w[k*ny*nx+j*nx+i])*vvp2*dtz*dtz; // This loop is auto-vectorized for(int kk=1;kk<=mm;kk++) { for(int kkk=1;kkk<=mm;kkk++) { tempuxz=tempuxz+c[kkk-1][1+kk]*(u[(k+kkk)*ny*nx+j*nx+(i+kk)] -u[(k-kkk)*ny*nx+j*nx+(i+kk)] +u[(k-kkk)*ny*nx+j*nx+(i-kk)] -u[(k+kkk)*ny*nx+j*nx+(i-kk)]); // u[k+kkk][j][i+kk], u[k-kkk][j][i+kk], u[k-kkk][j][i-kk], u[k+kkk][j][i-kk] tempuxy=tempuxy+c[kkk-1][1+kk]*(u[k*ny*nx+(j+kkk)*nx+(i+kk)] -u[k*ny*nx+(j-kkk)*nx+(i+kk)] +u[k*ny*nx+(j-kkk)*nx+(i-kk)] -u[k*ny*nx+(j+kkk)*nx+(i-kk)]); tempvyz=tempvyz+c[kkk-1][1+kk]*(v[(k+kkk)*ny*nx+(j+kk)*nx+i] -v[(k-kkk)*ny*nx+(j+kk)*nx+i] +v[(k-kkk)*ny*nx+(j-kk)*nx+i] -v[(k+kkk)*ny*nx+(j-kk)*nx+i]); tempvxy=tempvxy+c[kkk-1][1+kk]*(v[k*ny*nx+(j+kkk)*nx+(i+kk)] -v[k*ny*nx+(j-kkk)*nx+(i+kk)] +v[k*ny*nx+(j-kkk)*nx+(i-kk)] -v[k*ny*nx+(j+kkk)*nx+(i-kk)]); tempwyz=tempwyz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+(j+kk)*nx+i] -w[(k-kkk)*ny*nx+(j+kk)*nx+i] +w[(k-kkk)*ny*nx+(j-kk)*nx+i] -w[(k+kkk)*ny*nx+(j-kk)*nx+i]); tempwxz=tempwxz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+j*nx+(i+kk)] -w[(k-kkk)*ny*nx+j*nx+(i+kk)] +w[(k-kkk)*ny*nx+j*nx+(i-kk)] -w[(k+kkk)*ny*nx+j*nx+(i-kk)]); } // for(kkk=1;kkk<=mm;kkk++) end } //for(kk=1;kk<=mm;kk++) end // LValues below are only changed here up[k*ny*nx+j*nx+i]=2.*up1[k*ny*nx+j*nx+i]-up2[k*ny*nx+j*nx+i] +tempux2+tempwxz*vvp2*dtz*dtx +tempvxy*vvp2*dtz*dtx; // up1[k][j][j], up2[k][j][i], up[k][j][i] vp[k*ny*nx+j*nx+i]=2.*vp1[k*ny*nx+j*nx+i]-vp2[k*ny*nx+j*nx+i] +tempvy2+tempuxy*vvp2*dtz*dtx +tempwyz*vvp2*dtz*dtx; wp[k*ny*nx+j*nx+i]=2.*wp1[k*ny*nx+j*nx+i]-wp2[k*ny*nx+j*nx+i] +tempwz2+tempuxz*vvp2*dtz*dtx +tempvyz*vvp2*dtz*dtx +px*wave[l-1]; us[k*ny*nx+j*nx+i]=2.*us1[k*ny*nx+j*nx+i]-us2[k*ny*nx+j*nx+i]+tempuy2+tempuz2 -tempvxy*vvs2*dtz*dtx-tempwxz*vvs2*dtz*dtx; vs[k*ny*nx+j*nx+i]=2.*vs1[k*ny*nx+j*nx+i]-vs2[k*ny*nx+j*nx+i]+tempvx2+tempvz2 -tempuxy*vvs2*dtz*dtx-tempwyz*vvs2*dtz*dtx; ws[k*ny*nx+j*nx+i]=2.*ws1[k*ny*nx+j*nx+i]-ws2[k*ny*nx+j*nx+i]+tempwx2+tempwy2 -tempuxz*vvs2*dtz*dtx-tempvyz*vvs2*dtz*dtx; }//for(i=nleft;i<nright;i++) end } } // Again, those are UNIQUE access. Safe to share #pragma omp parallel for shared(up) shared(us) shared(vp) shared(vs) shared(wp) shared(ws) shared(u) shared(v) shared(w) \ shared(up2) shared(up1) shared(us2) shared(us1) shared(vp2) shared(vp1) shared(wp2) shared(wp1) shared(ws2) shared(ws1) for(int k=ntop;k<nbottom;k++) for(int j=nfront;j<nback;j++) for(int i=nleft;i<nright;i++) { u[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]+us[k*ny*nx+j*nx+i]; v[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]+vs[k*ny*nx+j*nx+i]; w[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]+ws[k*ny*nx+j*nx+i]; up2[k*ny*nx+j*nx+i]=up1[k*ny*nx+j*nx+i]; up1[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]; us2[k*ny*nx+j*nx+i]=us1[k*ny*nx+j*nx+i]; us1[k*ny*nx+j*nx+i]=us[k*ny*nx+j*nx+i]; vp2[k*ny*nx+j*nx+i]=vp1[k*ny*nx+j*nx+i]; vp1[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]; vs2[k*ny*nx+j*nx+i]=vs1[k*ny*nx+j*nx+i]; vs1[k*ny*nx+j*nx+i]=vs[k*ny*nx+j*nx+i]; wp2[k*ny*nx+j*nx+i]=wp1[k*ny*nx+j*nx+i]; wp1[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]; ws2[k*ny*nx+j*nx+i]=ws1[k*ny*nx+j*nx+i]; ws1[k*ny*nx+j*nx+i]=ws[k*ny*nx+j*nx+i]; }//for(i=nleft;i<nright;i++) end }//for(l=1;l<=lt;l++) end // [Afa] Do we need to keep the order of data? // [Afa Update] Yes, we do need to KEEP THE ORDER of data // fwrite(up+169*ny*nx,sizeof(float),ny*nx,fout); // This is the original fwrite MPI_File_seek(mpi_fout, (ishot - 1) * ny * nx * sizeof(float), MPI_SEEK_SET); MPI_File_write(mpi_fout, up + 169 * ny * nx, ny * nx, MPI_FLOAT, &mpi_status); }//for(ishot=1;ishot<=nshot;ishot++) end MPI_File_close(&mpi_fout); MPI_File_close(&mpi_flog); free(u); free(v); free(w); free(up); free(up1); free(up2); free(vp); free(vp1); free(vp2); free(wp); free(wp1); free(wp2); free(us); free(us1); free(us2); free(vs); free(vs1); free(vs2); free(ws); free(ws1); free(ws2); free(vpp); free(density); free(vss); free(wave); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); if (proc_rank == 0) { gettimeofday(&end,NULL); all_time = (end.tv_sec-start.tv_sec)+(float)(end.tv_usec-start.tv_usec)/1000000.0; printf("run time:\t%f s\n",all_time); flog = fopen(logfile,"a"); fprintf(flog,"\nrun time:\t%f s\n\n",all_time); fclose(flog); flog = fopen(logfile,"a"); fprintf(flog,"------------end time------------\n"); fclose(flog); system(tmp); } // Why return 1? return 0; }
/* Major reconstruction of memory management for -off_cache flag */ void IMB_init_buffers_iter(struct comm_info* c_info, struct iter_schedule* ITERATIONS, struct Bench* Bmark, MODES BMODE, int iter, int size) /* Initializes communications buffers (call set_buf) Initializes iterations scheduling Input variables: -Bmark (type struct Bench*) (For explanation of struct Bench type: describes all aspects of modes of a benchmark; see [1] for more information) Current benchmark -BMODE (type MODES) aggregate / non aggregate -iter (type int) number of current iteration of message size loop -size (type int) Message size In/out variables: -c_info (type struct comm_info*) Collection of all base data for MPI; see [1] for more information Communications buffers are allocated and assigned values -ITERATIONS (type struct iter_schedule*) Adaptive number of iterations, out of cache scheduling are setup if requested */ /* >> IMB 3.1 */ { /* IMB 3.1 << */ size_t s_len, r_len, s_alloc, r_alloc; int init_size, irep, i_s, i_r, x_sample; const int root_based = has_root(Bmark->name); x_sample = BMODE->AGGREGATE ? ITERATIONS->msgspersample : ITERATIONS->msgs_nonaggr; /* July 2002 fix V2.2.1: */ #if (defined EXT || defined MPIIO || RMA) if( Bmark->access==no ) x_sample=ITERATIONS->msgs_nonaggr; #endif ITERATIONS->n_sample = (size > 0) ? max(1, min(ITERATIONS->overall_vol / size, x_sample)) : x_sample; Bmark->sample_failure = 0; init_size = max(size, asize); if (c_info->rank < 0) { return; } else { if (ITERATIONS->iter_policy == imode_off) { ITERATIONS->n_sample = x_sample = ITERATIONS->msgspersample; } else if ((ITERATIONS->iter_policy == imode_multiple_np) || (ITERATIONS->iter_policy == imode_auto && root_based)) { /* n_sample for benchmarks with uneven distribution of works must be greater or equal and multiple to num_procs. The formula below is a negative leg of hyperbola. It's moved and scaled relative to max message size and initial n_sample subject to multiple to num_procs. */ double d_n_sample = ITERATIONS->msgspersample; int max_msg_size = 1<<c_info->max_msg_log; int tmp = (int)(d_n_sample*max_msg_size/(c_info->num_procs*init_size+max_msg_size)+0.5); ITERATIONS->n_sample = x_sample = max(tmp-tmp%c_info->num_procs, c_info->num_procs); } /* else as is */ } if ( #ifdef MPI1 !strcmp(Bmark->name,"Alltoall") || !strcmp(Bmark->name,"Alltoallv") #elif defined NBC // MPI1 !strcmp(Bmark->name, "Ialltoall") || !strcmp(Bmark->name, "Ialltoall_pure") || !strcmp(Bmark->name, "Ialltoallv") || !strcmp(Bmark->name, "Ialltoallv_pure") #else 0 #endif // NBC // MPI1 ) { s_len = (size_t)c_info->num_procs * (size_t)init_size; r_len = (size_t)c_info->num_procs * (size_t)init_size; } else if ( #ifdef MPI1 !strcmp(Bmark->name, "Allgather") || !strcmp(Bmark->name, "Allgatherv") || !strcmp(Bmark->name, "Gather") || !strcmp(Bmark->name, "Gatherv") #elif defined NBC !strcmp(Bmark->name, "Iallgather") || !strcmp(Bmark->name, "Iallgather_pure") || !strcmp(Bmark->name, "Iallgatherv") || !strcmp(Bmark->name, "Iallgatherv_pure") || !strcmp(Bmark->name, "Igather") || !strcmp(Bmark->name, "Igather_pure") || !strcmp(Bmark->name, "Igatherv") || !strcmp(Bmark->name, "Igatherv_pure") #else // MPI1 // NBC 0 #endif // MPI1 // NBC ) { s_len = (size_t) init_size; r_len = (size_t) c_info->num_procs * (size_t)init_size; } else if( !strcmp(Bmark->name,"Exchange") ) { s_len = 2 * (size_t)init_size; r_len = (size_t) init_size; } else if( #ifdef MPI1 !strcmp(Bmark->name,"Scatter") || !strcmp(Bmark->name,"Scatterv") #elif defined NBC // MPI1 !strcmp(Bmark->name,"Iscatter") || !strcmp(Bmark->name,"Iscatter_pure") || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure") #else // NBC // MPI1 0 #endif // NBC // MPI1 ) { s_len = (size_t)c_info->num_procs * (size_t)init_size; r_len = (size_t)init_size; } else if( !strcmp(Bmark->name,"Barrier") || /*!strcmp(Bmark->name,"Window") ||*/ !strcmp(Bmark->name,"Open_Close") ) { s_len = r_len = 0; } else if ( ! strcmp(Bmark->name,"Exchange_put") || ! strcmp(Bmark->name,"Exchange_get") ) { s_len = 2 * (size_t)init_size; r_len = 2 * (size_t)init_size; } else if (! strcmp(Bmark->name,"Compare_and_swap") ) { /* Compare_and_swap operations require 3 buffers, so allocate space for compare * buffers in our r_buffer */ s_len = (size_t)init_size; r_len = 3 * (size_t)init_size; } else { s_len = r_len = (size_t) init_size; } /*===============================================*/ /* the displ is declared as int by MPI1 standard If c_info->num_procs*init_size exceed INT_MAX value there is no way to run this sample */ if ( #ifdef MPI1 !strcmp(Bmark->name,"Alltoallv") || !strcmp(Bmark->name,"Allgatherv") || !strcmp(Bmark->name,"Scatterv") || !strcmp(Bmark->name,"Gatherv") #elif defined NBC // MPI1 !strcmp(Bmark->name,"Ialltoallv") || !strcmp(Bmark->name,"Ialltoallv_pure") || !strcmp(Bmark->name,"Iallgatherv") || !strcmp(Bmark->name,"Iallgatherv_pure") || !strcmp(Bmark->name,"Iscatterv") || !strcmp(Bmark->name,"Iscatterv_pure") || !strcmp(Bmark->name,"Igatherv") || !strcmp(Bmark->name,"Igatherv_pure") #else // NBC // MPI1 0 #endif // NBC // MPI1 ) { if( s_len > INT_MAX || r_len > INT_MAX) { Bmark->sample_failure = SAMPLE_FAILED_INT_OVERFLOW; return; } } /*===============================================*/ /* IMB 3.1: new memory management for -off_cache */ if (BMODE->type == Sync) { ITERATIONS->use_off_cache=0; ITERATIONS->n_sample=x_sample; } else { #ifdef MPIIO ITERATIONS->use_off_cache=0; #else ITERATIONS->use_off_cache = ITERATIONS->off_cache; #endif if (ITERATIONS->off_cache) { if ( ITERATIONS->cache_size > 0) { size_t cls = (size_t) ITERATIONS->cache_line_size; size_t ofs = ( (s_len + cls - 1) / cls + 1 ) * cls; ITERATIONS->s_offs = ofs; ITERATIONS->s_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); ofs = ( ( r_len + cls -1 )/cls + 1 )*cls; ITERATIONS->r_offs = ofs; ITERATIONS->r_cache_iter = min(ITERATIONS->n_sample,(2*ITERATIONS->cache_size*CACHE_UNIT+ofs-1)/ofs); } else { ITERATIONS->s_offs=ITERATIONS->r_offs=0; ITERATIONS->s_cache_iter=ITERATIONS->r_cache_iter=1; } } } #ifdef MPIIO s_alloc = s_len; r_alloc = r_len; #else if( ITERATIONS->use_off_cache ) { s_alloc = max(s_len,ITERATIONS->s_cache_iter*ITERATIONS->s_offs); r_alloc = max(r_len,ITERATIONS->r_cache_iter*ITERATIONS->r_offs); } else { s_alloc = s_len; r_alloc = r_len; } #endif c_info->used_mem = 1.f*(s_alloc+r_alloc)/MEM_UNIT; #ifdef DEBUG { size_t mx, mu; mx = (size_t) MEM_UNIT*c_info->max_mem; mu = (size_t) MEM_UNIT*c_info->used_mem; DBG_I3("Got send / recv lengths; iters ",s_len,r_len,ITERATIONS->n_sample); DBG_I2("max / used memory ",mx,mu); DBG_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBG_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBG_I2("send / recv buffer allocations ",s_alloc, r_alloc); DBGF_I2("Got send / recv lengths ",s_len,r_len); DBGF_I2("max / used memory ",mx,mu); DBGF_I2("send / recv offsets ",ITERATIONS->s_offs, ITERATIONS->r_offs); DBGF_I2("send / recv cache iterations ",ITERATIONS->s_cache_iter, ITERATIONS->r_cache_iter); DBGF_I2("send / recv buffer allocations ",s_alloc, r_alloc); } #endif if( c_info->used_mem > c_info->max_mem ) { Bmark->sample_failure=SAMPLE_FAILED_MEMORY; return; } if (s_alloc > 0 && r_alloc > 0) { if (ITERATIONS->use_off_cache) { IMB_alloc_buf(c_info, "IMB_init_buffers_iter 1", s_alloc, r_alloc); IMB_set_buf(c_info, c_info->rank, 0, s_len-1, 0, r_len-1); for (irep = 1; irep < ITERATIONS->s_cache_iter; irep++) { i_s = irep % ITERATIONS->s_cache_iter; memcpy((void*)((char*)c_info->s_buffer + i_s * ITERATIONS->s_offs), c_info->s_buffer, s_len); } for (irep = 1; irep < ITERATIONS->r_cache_iter; irep++) { i_r = irep % ITERATIONS->r_cache_iter; memcpy((void*)((char*)c_info->r_buffer + i_r * ITERATIONS->r_offs), c_info->r_buffer, r_len); } } else { IMB_set_buf(c_info, c_info->rank, 0, s_alloc-1, 0, r_alloc-1); } } IMB_init_transfer(c_info, Bmark, size, (MPI_Aint) max(s_alloc, r_alloc)); /* Determine #iterations if dynamic adaptation requested */ if ((ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based)) { double time[MAX_TIME_ID]; int acc_rep_test, t_sample; int selected_n_sample = ITERATIONS->n_sample; memset(time, 0, MAX_TIME_ID); if (iter == 0 || BMODE->type == Sync) { ITERATIONS->n_sample_prev = ITERATIONS->msgspersample; if (c_info->n_lens > 0) { memset(ITERATIONS->numiters, 0, c_info->n_lens); } } /* first, run 1 iteration only */ ITERATIONS->n_sample=1; #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); { /* determine rough #repetitions for a run time of 1 sec */ int rep_test = 1; if (time[0] < (1.0 / MSGSPERSAMPLE)) { rep_test = MSGSPERSAMPLE; } else if ((time[0] < 1.0)) { rep_test = (int)(1.0 / time[0] + 0.5); } MPI_Allreduce(&rep_test, &acc_rep_test, 1, MPI_INT, MPI_MAX, c_info->communicator); } ITERATIONS->n_sample = min(selected_n_sample, acc_rep_test); if (ITERATIONS->n_sample > 1) { #ifdef MPI1 c_info->select_source = Bmark->select_source; #endif Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); time[1] = time[0]; #ifdef MPIIO if( Bmark->access != no) { ierr = MPI_File_seek(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); if ( Bmark->fpointer == shared) { ierr = MPI_File_seek_shared(c_info->fh, 0 ,MPI_SEEK_SET); MPI_ERRHAND(ierr); } } #endif /*MPIIO*/ MPI_Allreduce(&time[1], &time[0], 1, MPI_DOUBLE, MPI_MAX, c_info->communicator); } { float val = (float) (1+ITERATIONS->secs/time[0]); t_sample = (time[0] > 1.e-8 && (val <= (float) 0x7fffffff)) ? (int)val : selected_n_sample; } if (c_info->n_lens>0 && BMODE->type != Sync) { // check monotonicity with msg sizes int i; for (i = 0; i < iter; i++) { t_sample = ( c_info->msglen[i] < size ) ? min(t_sample,ITERATIONS->numiters[i]) : max(t_sample,ITERATIONS->numiters[i]); } ITERATIONS->n_sample = ITERATIONS->numiters[iter] = min(selected_n_sample, t_sample); } else { ITERATIONS->n_sample = min(selected_n_sample, min(ITERATIONS->n_sample_prev, t_sample)); } MPI_Bcast(&ITERATIONS->n_sample, 1, MPI_INT, 0, c_info->communicator); #ifdef DEBUG { int usec=time*1000000; DBGF_I2("Checked time with #iters / usec ",acc_rep_test,usec); DBGF_I1("=> # samples, aligned with previous ",t_sample); DBGF_I1("final #samples ",ITERATIONS->n_sample); } #endif } else { /*if( (ITERATIONS->iter_policy == imode_dynamic) || (ITERATIONS->iter_policy == imode_auto && !root_based) )*/ double time[MAX_TIME_ID]; Bmark->Benchmark(c_info,size,ITERATIONS,BMODE,&time[0]); } ITERATIONS->n_sample_prev=ITERATIONS->n_sample; /* >> IMB 3.1 */ }
uint readVectorFromEnsight( latticeMesh* mesh, scalar*** field, char* fname ) { unsigned int status = 0; // Open file char name[200]; sprintf(name, "lattice.%s_%d", fname, timeToIndex(mesh->time.current)); MPI_File file; MPI_File_open( MPI_COMM_WORLD, name, MPI_MODE_RDONLY, MPI_INFO_NULL, &file ); // Allocate space *field = matrixDoubleAlloc(mesh->mesh.nPoints, 3, 0); float *auxField = (float*)malloc( mesh->mesh.nPoints * sizeof(float) ); // Set Offset MPI_Offset offset = 240*sizeof(char) + sizeof(int); uint i,j; for(i = 0 ; i < mesh->parallel.pid ; i++ ) { offset += 3*mesh->parallel.nodesPerPatch[i] * sizeof(float); offset += 160*sizeof(char) + sizeof(int); } MPI_File_seek(file, offset, MPI_SEEK_SET); // Read Array MPI_Status st; for( j = 0 ; j < 3 ; j++) { MPI_File_read(file, auxField, mesh->mesh.nPoints, MPI_FLOAT, &st); for( i = 0 ; i < mesh->mesh.nPoints ; i++ ) { field[0][i][j] = (scalar)auxField[i]; } MPI_Barrier(MPI_COMM_WORLD); } MPI_File_close(&file); free(auxField); if( (int)st._ucount/sizeof(float) == mesh->mesh.nPoints ) { status = 1; } return status; }