SizeT _MPIStream_Write( Stream* stream, void *data, SizeT elem_size, SizeT num_elems ) { MPIStream* self = (MPIStream*)stream; MPI_Status status; int writeResult; writeResult = MPI_File_write( *(MPI_File*)(self->_file->fileHandle), data, num_elems * elem_size, MPI_BYTE, &status ); if (writeResult != MPI_SUCCESS) { char errorString[2000]; int errorStringLength = 0; Stream* errorStream = Journal_Register( Error_Type, MPIFile_Type ); int myRank = 0; MPI_Comm_rank( MPI_COMM_WORLD, &myRank ); MPI_Error_string( writeResult, errorString, &errorStringLength); Journal_Printf( errorStream, "%3d: %s\n", myRank, errorString ); File_Close( self->_file ); MPI_Abort(MPI_COMM_WORLD, writeResult ); } return num_elems; }
/** * \brief Initialization function of measure function * measure_MPI_IO_write_file_once(). * * Only one process is active. It writes once to a file. * * Remark:<br> * With the <tt>O_DIRECT</tt> flag set, cache effects are minimized, because I/O * is done directly to/from user space buffers. The operation system's page * cache is bypassed. Under Linux 2.6 alignment to 512-byte boundaries is * required for buffer and file offset. Thus the following parameters should be * set in a SKaMPI input file: * - <tt>set_send_buffert_alignment (512)</tt> * - <tt>set_recv_buffert_alignment (512)</tt> * - <tt>switch_buffer_cycling_off ()</tt><br> * * For more information please refer to the <tt>open ()</tt> man pages. * * \param[in] size size of memory buffer, i.e. number of <tt>MPI_BYTE</tt>s * \param[in] api POSIX-API or MPI-API for I/O accesses * \param[in] create_flag write into existing file (FALSE) or create it (TRUE) * \param[in] directio_flag open file with <tt>O_DIRECT</tt> flag to minimize * cache effects * * \return void */ void init_MPI_IO_write_file_once (int size, char *api, int create_flag, int directio_flag) { char *send_buffer; assert (size > 0); io_filename = get_io_filename (IO_FILENAME, 0); if (get_measurement_rank () == 0){ if (create_flag == 0){ send_buffer = mpi_malloc_chars (get_extent (size, MPI_BYTE)); MPI_File_open (MPI_COMM_SELF, io_filename, MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN, MPI_INFO_NULL, &io_fh); MPI_File_set_view (io_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); MPI_File_write (io_fh, send_buffer, size, MPI_BYTE, MPI_STATUS_IGNORE); MPI_File_close (&io_fh); mpi_free (send_buffer); } set_send_buffer_usage (size); set_reported_message_size (size); } MPI_Barrier (get_measurement_comm ()); /* set synchronization type: SYNC_BARRIER if all SKaMPI processes run on one physical processor SYNC_REAL if every SKaMPI process runs on its own physical processor */ set_synchronization (SYNC_REAL); init_synchronization (); }
int main(int argc, char **argv) { int *buf, i, rank, nints, len; char *filename, *tmp; MPI_File fh; MPI_Status status; FILE * fp; char * line = NULL; size_t slen = 0; ssize_t sread; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if(rank == 0){ i = 1; while((i < argc) && strcmp("-fname", *argv)){ i++; argv++; } if(i >= argc){ fprintf(stderr, "sss\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *)malloc(len+10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } else{ MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *)malloc(SIZE); // nints = SIZE/sizeof(int); fp = fopen("/Users/jinyangzhou/Documents/Github/MPI-IO", "r"); while((sread = getline(&line, &slen, fp)) != -1){ // printf("%d\n", atoi(line)); for(i = rank*3; i<rank+3; i++){ buf[i] = atoi(line); } } /* for(i = 0; i<nints, i++){ buf[i] = rank; } */ tmp = (char *) malloc(len+10); strcpy(tmp, filename); sprintf(filename, "%s.%d", tmp, rank); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_write(fh, buf, 3, MPI_INT, &status); MPI_File_close(&fh); for(i = rank*3; i<rank+3; i++) buf[i] = buf[i] + 10; MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE + MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_write(fh, buf, 3, MPI_INT, &status); MPI_File_close(&fh); for(i = rank*3; i<rank+3; i++) printf("%d\n", buf[i]); free(buf); free(filename); free(tmp); MPI_Finalize(); return 0; }
int main(int argc, char* argv[]) { int n, my_rank; int array_of_subsizes[NDIMS], array_of_starts[NDIMS], array_of_sizes[NDIMS]; int size = 4; int sqrtn; int ln; MPI_Datatype filetype, memtype; MPI_File fh; char hdr[128]; int header_bytes; unsigned char *cur; char name[128]; int resultlen; int ret; int i, j; /* Initialize MPI. */ MPI_Init(&argc, &argv); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* Learn my rank and the total number of processors. */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &n); /* Speak! */ MPI_Get_processor_name(name, &resultlen); printf("process %d running on %s\n", my_rank, name); /* Set up our values. */ sqrtn = (int)sqrt(n); ln = size/sqrtn; printf("n = %d, sqrtn = %d, ln = %d storage = %d\n", n, sqrtn, ln, (ln + 2) * (ln + 2)); /* Allocation storage. */ if (!(cur = calloc((ln + 2) * (ln + 2), 1))) return ERR; /* Initialize data. */ for (i = 1; i < ln + 1; i++) for (j = 1; j < ln + 1; j++) cur[i * (ln + 2) + j] = my_rank; /* Create a subarray type for the file. */ array_of_sizes[0] = array_of_sizes[1] = size; array_of_subsizes[0] = array_of_subsizes[1] = ln; array_of_starts[0] = my_rank/sqrtn * ln; array_of_starts[1] = (my_rank % sqrtn) * ln; if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &filetype))) MPIERR(ret); if ((ret = MPI_Type_commit(&filetype))) MPIERR(ret); /* Create a subarray type for memory. */ array_of_sizes[0] = array_of_sizes[1] = ln + 2; array_of_subsizes[0] = array_of_subsizes[1] = ln; array_of_starts[0] = array_of_starts[1] = 1; if ((ret = MPI_Type_create_subarray(NDIMS, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_BYTE, &memtype))) MPIERR(ret); if ((ret = MPI_Type_commit(&memtype))) MPIERR(ret); MPI_File_delete(FILE_NAME, MPI_INFO_NULL); if ((ret = MPI_File_open(MPI_COMM_WORLD, FILE_NAME, MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh))) MPIERR(ret); /* Create header info, and have process 0 write it to the file. */ sprintf(hdr, "P5\n%d %d\n255\n", size, size); header_bytes = strlen(hdr); if ((ret = MPI_File_write_all(fh, hdr, header_bytes, MPI_BYTE, MPI_STATUS_IGNORE))) MPIERR(ret); /* Set the file view to translate our memory data into the file's data layout. */ MPI_File_set_view(fh, header_bytes, MPI_BYTE, filetype, "native", MPI_INFO_NULL); /* Write the output. */ MPI_File_write(fh, cur, 1, memtype, MPI_STATUS_IGNORE); if ((ret = MPI_File_close(&fh))) MPIERR(ret); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int *buf, i, rank, nints, flag; size_t len; char *filename, *tmp; MPI_File fh; MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { printf("\n*# Usage: %s -fname filename\n\n", argv[0]); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, (int)len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+10); MPI_Bcast(filename, (int)len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(SIZE); nints = SIZE/sizeof(int); for (i=0; i<nints; i++) buf[i] = rank*100000 + i; /* each process opens a separate file called filename.'myrank' */ tmp = (char *) malloc(len+10); strcpy(tmp, filename); sprintf(filename, "%s.%d", tmp, rank); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL); MPI_File_write(fh, buf, nints, MPI_INT, &status); MPI_File_close(&fh); /* reopen the file and read the data back */ for (i=0; i<nints; i++) buf[i] = 0; MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL); MPI_File_read(fh, buf, nints, MPI_INT, &status); MPI_File_close(&fh); /* check if the data read is correct */ flag = 0; for (i=0; i<nints; i++) if (buf[i] != (rank*100000 + i)) { printf("Process %d: error, read %d, should be %d\n", rank, buf[i], rank*100000+i); flag = 1; } if (!flag) printf("Process %d: data read back is correct\n", rank); free(buf); free(filename); free(tmp); MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int rank, size; const int N = atoi(argv[1]); // printf("Number of testcase = %d\n", N); MPI_Init (&argc, &argv); double start_time, end_time; MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &size); // printf("My rank is %d \n", rank); //start_time = MPI_Wtime(); MPI_File fin, fout; MPI_Status status; int *root_arr; int max_arr_size = size > N ? size : N; int ret = MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_RDONLY, MPI_INFO_NULL, &fin); if (rank == ROOT) { root_arr = new int[max_arr_size+3]; // printf("Enter rank 0 statement ... \n"); MPI_File_read(fin, root_arr, N, MPI_INT, &status); /* for (int i = 0; i < N; ++i) printf("[START] [Rank %d] root_arr[%d] = %d\n", rank, i, root_arr[i]); printf("Out Rank 0 statement ... \n"); */ } MPI_File_close(&fin); MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file int rank_num = size > N ? N : size; const int LAST = rank_num - 1; int num_per_node = N / rank_num; int *local_arr; int num_per_node_diff = N - num_per_node * rank_num; int diff = num_per_node_diff; bool has_remain = false; bool has_remain_rank = rank_num % 2 ? true : false; if (num_per_node_diff > 0) { // Send remaining elements to size - 1 has_remain = true; if (rank == ROOT) { MPI_Send(root_arr + N - diff, diff, MPI_INT, LAST, 0, MPI_COMM_WORLD); } else if (rank == LAST) { // Handle special case num_per_node += num_per_node_diff; local_arr = new int[num_per_node+1]; MPI_Recv(local_arr + num_per_node - diff, diff, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } } else if(rank == rank_num - 1) { local_arr = new int[num_per_node+1]; } MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file if (rank != rank_num - 1) local_arr = new int[num_per_node+1]; // MPI_Scatter (send_buf, send_count, send_type, recv_buf, recv_count, recv_type, root, comm) if (rank < LAST) MPI_Scatter(root_arr, num_per_node, MPI_INT, local_arr, num_per_node, MPI_INT, ROOT, MPI_COMM_WORLD); else MPI_Scatter(root_arr, num_per_node-diff, MPI_INT, local_arr, num_per_node-diff, MPI_INT, ROOT, MPI_COMM_WORLD); // printf("[Rank %d] num_per_node_size = %d\n" ,rank, num_per_node); MPI_Barrier(MPI_COMM_WORLD); /* for (int i = 0; i < num_per_node; ++i) printf("[BEFORE] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); */ if (rank < rank_num) { int round = N % 2 ? N+1 : N; for (int i = 0; i < round; ++i) { // bool need_send = (i & 1)^(num_per_node & 1); bool need_send = true; for (int j = i & 1; j < num_per_node; j+=2) { if (j+1 < num_per_node) { if (local_arr[j] > local_arr[j+1]) swap(local_arr[j], local_arr[j+1]); } else if (j-1 >= 0) { if (local_arr[j-1] > local_arr[j]) swap(local_arr[j-1], local_arr[j]); } } int element; bool recv_side; if (i & 1) { if (rank & 1) recv_side = true; else recv_side = false; } else { if (rank & 1) recv_side = false; else recv_side = true; } // if (recv_side) printf("i = %d, rank = %d, recv\n", i, rank); // if (!recv_side) printf("i = %d, rank = %d, send\n", i, rank); if (recv_side) { if (rank != ROOT) { /* Receive element */ MPI_Recv(&element, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD, &status); MPI_Send(local_arr, 1, MPI_INT, rank - 1, 0, MPI_COMM_WORLD); if (element > local_arr[0]) swap(element, local_arr[0]); } } else { /* Send element */ if (rank != LAST) { element = local_arr[num_per_node-1]; MPI_Send(&element, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD); MPI_Recv(&element, 1, MPI_INT, rank + 1, 0, MPI_COMM_WORLD, &status); if (element < local_arr[num_per_node-1]) swap(element, local_arr[num_per_node-1]); } } } } /* MPI_Barrier(MPI_COMM_WORLD); for (int i = 0; i < num_per_node; ++i) printf("[AFTER] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); printf("rank %d is arrived\n", rank); */ MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file int *ans; if (rank == ROOT) { ans = new int[max_arr_size+3]; } if (has_remain && rank == rank_num - 1) { MPI_Gather(local_arr, num_per_node - diff, MPI_INT, ans, num_per_node - diff, MPI_INT, ROOT, MPI_COMM_WORLD); MPI_Send(local_arr + num_per_node - diff, diff, MPI_INT, ROOT, 0, MPI_COMM_WORLD); } else { MPI_Gather(local_arr, num_per_node, MPI_INT, ans, num_per_node, MPI_INT, ROOT, MPI_COMM_WORLD); if (has_remain && rank == ROOT) MPI_Recv(ans + N - diff, diff, MPI_INT, LAST, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, argv[3], MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fout); if (rank == ROOT) { MPI_File_write(fout, ans, N, MPI_INT, &status); for (int i = 0; i < N; ++i) { // printf("[FINAL] [Rank %d] ans[%d] = %d\n", rank, i, ans[i]); } } MPI_File_close(&fout); // printf("rank %d is arrived\n", rank); MPI_Barrier(MPI_COMM_WORLD); if (rank != 0) { delete [] local_arr; // printf("[FREE] [RANK %d] SUCCESS FREE\n", rank); } else { delete [] ans; delete [] root_arr; delete [] local_arr;; } MPI_Finalize(); return 0; }
int main(int argc, char *argv[]) { int i, j, nerrors=0, total_errors=0; int rank, size; int bpos; MPI_Datatype darray; MPI_Status status; MPI_File mpi_fh; /* Define array distribution A 2x2 block size works with ROMIO, a 3x3 block size breaks it. */ int distrib[2] = { MPI_DISTRIBUTE_CYCLIC, MPI_DISTRIBUTE_CYCLIC }; int bsize[2] = { NBLOCK, NBLOCK }; int gsize[2] = { NSIDE, NSIDE }; int psize[2] = { NPROC, NPROC }; double data[NSIDE*NSIDE]; double *ldata, *pdata; int tsize, nelem; MPI_File dfile; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* Set up type */ CHECK(MPI_Type_create_darray(size, rank, 2, gsize, distrib, bsize, psize, MPI_ORDER_FORTRAN, MPI_DOUBLE, &darray)); CHECK(MPI_Type_commit(&darray)); CHECK(MPI_Type_size(darray, &tsize)); nelem = tsize / sizeof(double); for(i = 0; i < (NSIDE*NSIDE); i++) data[i] = i; if (rank == 0) { CHECK(MPI_File_open(MPI_COMM_SELF, argv[1], MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &dfile)); CHECK(MPI_File_write(dfile, data, NSIDE*NSIDE, MPI_DOUBLE, &status)); CHECK(MPI_File_close(&dfile)); } MPI_Barrier(MPI_COMM_WORLD); /* Allocate buffer */ ldata = (double *)malloc(tsize); pdata = (double *)malloc(tsize); /* Use Pack to pull out array */ bpos = 0; CHECK(MPI_Pack(data, 1, darray, pdata, tsize, &bpos, MPI_COMM_WORLD)); MPI_Barrier(MPI_COMM_WORLD); /* Read in array from file. */ CHECK(MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_fh)); CHECK(MPI_File_set_view(mpi_fh, 0, MPI_DOUBLE, darray, "native", MPI_INFO_NULL)); CHECK(MPI_File_read_all(mpi_fh, ldata, nelem, MPI_DOUBLE, &status)); CHECK(MPI_File_close(&mpi_fh)); for(i = 0; i < size; i++) { #ifdef VERBOSE MPI_Barrier(MPI_COMM_WORLD); if(rank == i) { printf("=== Rank %i === (%i elements) \nPacked: ", rank, nelem); for(j = 0; j < nelem; j++) { printf("%4.1f ", pdata[j]); fflush(stdout); } printf("\nRead: "); for(j = 0; j < nelem; j++) { printf("%4.1f ", ldata[j]); fflush(stdout); } printf("\n\n"); fflush(stdout); } #endif if(rank == i) { for (j=0; j< nelem; j++) { if (pdata[j] != ldata[j]) { fprintf(stderr, "rank %d at index %d: packbuf %4.1f filebuf %4.1f\n", rank, j, pdata[j], ldata[j]); nerrors++; } } } } MPI_Allreduce(&nerrors, &total_errors, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); if (rank == 0 && total_errors == 0) printf(" No Errors\n"); free(ldata); free(pdata); MPI_Type_free(&darray); MPI_Finalize(); exit(total_errors); }
//*************************************************************************************************************** void ChimeraCheckRDP::makeSVGpic(vector<sim> info) { try{ string file = outputDir + querySeq->getName() + ".chimeracheck.svg"; MPI_File outSVG; int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; //char* FileName = new char[file.length()]; //memcpy(FileName, file.c_str(), file.length()); char FileName[1024]; strcpy(FileName, file.c_str()); MPI_File_open(MPI_COMM_SELF, FileName, outMode, MPI_INFO_NULL, &outSVG); //comm, filename, mode, info, filepointer //delete FileName; int width = (info.size()*5) + 150; string outString = ""; outString += "<svg xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns=\"http://www.w3.org/2000/svg\" width=\"100%\" height=\"100%\" viewBox=\"0 0 700 " + toString(width) + "\">\n"; outString += "<g>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"25\">Plotted IS values for " + querySeq->getName() + "</text>\n"; outString += "<line x1=\"75\" y1=\"600\" x2=\"" + toString((info.size()*5) + 75) + "\" y2=\"600\" stroke=\"black\" stroke-width=\"2\"/>\n"; outString += "<line x1=\"75\" y1=\"600\" x2=\"75\" y2=\"125\" stroke=\"black\" stroke-width=\"2\"/>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"80\" y=\"620\">" + toString(info[0].midpoint) + "</text>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((info.size()*5) + 75) + "\" y=\"620\">" + toString(info[info.size()-1].midpoint) + "</text>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"" + toString((width / 2) - 150) + "\" y=\"650\">Base Positions</text>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"580\">0</text>\n"; outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"350\">IS</text>\n"; //find max is score float biggest = 0.0; for (int i = 0; i < info.size(); i++) { if (info[i].score > biggest) { biggest = info[i].score; } } outString += "<text fill=\"black\" class=\"seri\" x=\"50\" y=\"135\">" + toString(biggest) + "</text>\n"; int scaler2 = 500 / biggest; outString += "<polyline fill=\"none\" stroke=\"red\" stroke-width=\"2\" points=\""; //160,200 180,230 200,210 234,220\"/> "; for (int i = 0; i < info.size(); i++) { if(info[i].score < 0) { info[i].score = 0; } outString += toString(((i*5) + 75)) + "," + toString((600 - (info[i].score * scaler2))) + " "; } outString += "\"/> "; outString += "</g>\n</svg>\n"; MPI_Status status; int length = outString.length(); char* buf2 = new char[length]; memcpy(buf2, outString.c_str(), length); MPI_File_write(outSVG, buf2, length, MPI_CHAR, &status); delete buf2; MPI_File_close(&outSVG); } catch(exception& e) { m->errorOut(e, "ChimeraCheckRDP", "makeSVGpic"); exit(1); } }
int main(int argc, char **argv) { int *buf, i, rank, nints, len; char *filename, *tmp; int errs = 0, toterrs, errcode; MPI_File fh; MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!rank) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: simple -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+10); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+10); MPI_Bcast(filename, len+10, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(SIZE); nints = SIZE/sizeof(int); for (i=0; i<nints; i++) buf[i] = rank*100000 + i; /* each process opens a separate file called filename.'myrank' */ tmp = (char *) malloc(len+10); strcpy(tmp, filename); sprintf(filename, "%s.%d", tmp, rank); errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open(1)"); errcode = MPI_File_write(fh, buf, nints, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_write"); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_clode (1)"); /* reopen the file and read the data back */ for (i=0; i<nints; i++) buf[i] = 0; errcode = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_open(2)"); errcode = MPI_File_read(fh, buf, nints, MPI_INT, &status); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_read"); errcode = MPI_File_close(&fh); if (errcode != MPI_SUCCESS) handle_error(errcode, "MPI_File_close(2)"); /* check if the data read is correct */ for (i=0; i<nints; i++) { if (buf[i] != (rank*100000 + i)) { errs++; fprintf(stderr, "Process %d: error, read %d, should be %d\n", rank, buf[i], rank*100000+i); } } MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { if( toterrs > 0) { fprintf( stderr, "Found %d errors\n", toterrs ); } else { fprintf( stdout, " No Errors\n" ); } } free(buf); free(filename); free(tmp); MPI_Finalize(); return 0; }
/* * Save rank_values_table[] to a file * Note * -. Only root (rank 0) will save rank values to a file * -. Assumed rank_values_table[] are the same across all proc */ int mpi_write(char *filename, /* (IN) file name */ int totalNumUrls, /* (IN) number of total urls */ double *rank_values_table, /* (IN) array of rank values. double[total_num_urls] */ MPI_Comm comm) /* (IN) MPI communicator */ { int divide, rem, len, err; int i, j, k, rank, nproc; char outFileName[1024], fs_type[32], str[32]; MPI_File fh,fhall; MPI_Status status; int *index; void quickSort(double arr[], int index[],int left, int right); MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &nproc); /* only proc 0 do this, because rank_values_table[] are the same across all proc */ if (rank == 0) { index = (int *)malloc(totalNumUrls * sizeof(int)); sprintf(outFileName, "%s.all",filename); printf("\nProc:%d is writing rank values of %d urls to file %s\n", rank, totalNumUrls,outFileName); printf("Proc:%d is writing top 10 page rank values to file %s\n", rank,filename); err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) { char errstr[MPI_MAX_ERROR_STRING]; int errlen; MPI_Error_string(err, errstr, &errlen); printf("Error at opening file %s (%s)\n", filename, errstr); MPI_Finalize(); exit(1); } err = MPI_File_open(MPI_COMM_SELF, outFileName, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fhall); if (err != MPI_SUCCESS) { char errstr[MPI_MAX_ERROR_STRING]; int errlen; MPI_Error_string(err, errstr, &errlen); printf("Error at opening file %s (%s)\n", outFileName, errstr); MPI_Finalize(); exit(1); } for (i = 0; i < totalNumUrls; i++) { index[i] = i; memset(str,'\0',32); sprintf(str, "%d ", i); MPI_File_write(fhall, str, strlen(str), MPI_CHAR, &status); sprintf(str, "%f\n", rank_values_table[i]); MPI_File_write(fhall, str, strlen(str), MPI_CHAR, &status); } MPI_File_close(&fhall); // Sort the urls printf("Using quicksort to sort the top 10 Urls\n"); quickSort(rank_values_table,index,0,totalNumUrls-1); for (i = totalNumUrls-1; i >= totalNumUrls-10; i--) { memset(str,'\0',32); sprintf(str, "%d\t", index[i]); MPI_File_write(fh, str, strlen(str), MPI_CHAR, &status); sprintf(str, "%f\n", rank_values_table[i]); MPI_File_write(fh, str, strlen(str), MPI_CHAR, &status); } MPI_File_close(&fh); free(index); } return 1; }
int main( int argc, char *argv[] ) { int errs = 0; int size, rank, i, *buf, count, rc; MPI_File fh; MPI_Comm comm; MPI_Status status; MTest_Init( &argc, &argv ); comm = MPI_COMM_WORLD; rc = MPI_File_open( comm, (char*)"test.ord", MPI_MODE_RDWR | MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &fh ); if (rc) { MTestPrintErrorMsg( "File_open", rc ); errs++; /* If the open fails, there isn't anything else that we can do */ goto fn_fail; } MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); buf = (int *)malloc( size * sizeof(int) ); buf[0] = rank; /* Write to file */ rc = MPI_File_write_ordered( fh, buf, 1, MPI_INT, &status ); if (rc) { MTestPrintErrorMsg( "File_write_ordered", rc ); errs++; } else { MPI_Get_count( &status, MPI_INT, &count ); if (count != 1) { errs++; fprintf( stderr, "Wrong count (%d) on write-ordered\n", count ); } } /* Set the individual pointer to 0, since we want to use a read_all */ MPI_File_seek( fh, 0, MPI_SEEK_SET ); /* Read nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_read( fh, buf, 0, MPI_INT, &status ); MPI_Get_count( &status, MPI_INT, &count ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on read\n", count ); } /* Write nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_write( fh, buf, 0, MPI_INT, &status ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on write\n", count ); } /* Read shared nothing (check status) */ MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); /* Read nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_read_shared( fh, buf, 0, MPI_INT, &status ); MPI_Get_count( &status, MPI_INT, &count ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on read shared\n", count ); } /* Write nothing (check status) */ memset( &status, 0xff, sizeof(MPI_Status) ); MPI_File_write_shared( fh, buf, 0, MPI_INT, &status ); if (count != 0) { errs++; fprintf( stderr, "Count not zero (%d) on write\n", count ); } MPI_Barrier( comm ); MPI_File_seek_shared( fh, 0, MPI_SEEK_SET ); for (i=0; i<size; i++) buf[i] = -1; MPI_File_read_ordered( fh, buf, 1, MPI_INT, &status ); if (buf[0] != rank) { errs++; fprintf( stderr, "%d: buf = %d\n", rank, buf[0] ); } free( buf ); MPI_File_close( &fh ); fn_fail: MTest_Finalize( errs ); MPI_Finalize(); return 0; }
/////// need to fix to work with calcs and sequencedb int DistanceCommand::driverMPI(int startLine, int endLine, string file, unsigned long long& size, string square){ try { ValidCalculators validCalculator; Dist* distCalculator; if (m->isTrue(countends) == true) { for (int i=0; i<Estimators.size(); i++) { if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } else if (Estimators[i] == "eachgap") { distCalculator = new eachGapDist(); } else if (Estimators[i] == "onegap") { distCalculator = new oneGapDist(); } } } }else { for (int i=0; i<Estimators.size(); i++) { if (validCalculator.isValidCalculator("distance", Estimators[i]) == true) { if (Estimators[i] == "nogaps") { distCalculator = new ignoreGaps(); } else if (Estimators[i] == "eachgap"){ distCalculator = new eachGapIgnoreTermGapDist(); } else if (Estimators[i] == "onegap") { distCalculator = new oneGapIgnoreTermGapDist(); } } } } MPI_Status status; MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; //char* filename = new char[file.length()]; //memcpy(filename, file.c_str(), file.length()); char filename[1024]; strcpy(filename, file.c_str()); MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); //delete filename; int startTime = time(NULL); string outputString = ""; size = 0; if(startLine == 0){ outputString += toString(alignDB.getNumSeqs()) + "\n"; } for(int i=startLine;i<endLine;i++){ string name = alignDB.get(i).getName(); if (name.length() < 10) { //pad with spaces to make compatible while (name.length() < 10) { name += " "; } } outputString += name; for(int j=0;j<alignDB.getNumSeqs();j++){ if (m->control_pressed) { delete distCalculator; return 0; } distCalculator->calcDist(alignDB.get(i), alignDB.get(j)); double dist = distCalculator->getDist(); outputString += "\t" + toString(dist); } outputString += "\n"; if(i % 100 == 0){ m->mothurOutJustToScreen(toString(i) + "\t" + toString(time(NULL) - startTime)+"\n"); } //send results to parent int length = outputString.length(); char* buf = new char[length]; memcpy(buf, outputString.c_str(), length); MPI_File_write(outMPI, buf, length, MPI_CHAR, &status); size += outputString.length(); outputString = ""; delete buf; } m->mothurOutJustToScreen(toString(endLine-1) + "\t" + toString(time(NULL) - startTime)+"\n"); MPI_File_close(&outMPI); delete distCalculator; return 1; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "driverMPI"); exit(1); } }
int DistanceCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } int startTime = time(NULL); //save number of new sequence numNewFasta = alignDB.getNumSeqs(); //sanity check the oldfasta and column file as well as add oldfasta sequences to alignDB if ((oldfastafile != "") && (column != "")) { if (!(sanityCheck())) { return 0; } } if (m->control_pressed) { return 0; } int numSeqs = alignDB.getNumSeqs(); cutoff += 0.005; if (!alignDB.sameLength()) { m->mothurOut("[ERROR]: your sequences are not the same length, aborting."); m->mothurOutEndLine(); return 0; } string outputFile; map<string, string> variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile)); if (output == "lt") { //does the user want lower triangle phylip formatted file variables["[outputtag]"] = "phylip"; outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); //output numSeqs to phylip formatted dist file }else if (output == "column") { //user wants column format outputFile = getOutputFileName("column", variables); outputTypes["column"].push_back(outputFile); //so we don't accidentally overwrite if (outputFile == column) { string tempcolumn = column + ".old"; rename(column.c_str(), tempcolumn.c_str()); } m->mothurRemove(outputFile); }else { //assume square variables["[outputtag]"] = "square"; outputFile = getOutputFileName("phylip", variables); m->mothurRemove(outputFile); outputTypes["phylip"].push_back(outputFile); } #ifdef USE_MPI int pid, start, end; int tag = 2001; MPI_Status status; MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are //each process gets where it should start and stop in the file if (output != "square") { start = int (sqrt(float(pid)/float(processors)) * numSeqs); end = int (sqrt(float(pid+1)/float(processors)) * numSeqs); }else{ start = int ((float(pid)/float(processors)) * numSeqs); end = int ((float(pid+1)/float(processors)) * numSeqs); } if (output == "column") { MPI_File outMPI; int amode=MPI_MODE_CREATE|MPI_MODE_WRONLY; //char* filename = new char[outputFile.length()]; //memcpy(filename, outputFile.c_str(), outputFile.length()); char filename[1024]; strcpy(filename, outputFile.c_str()); MPI_File_open(MPI_COMM_WORLD, filename, amode, MPI_INFO_NULL, &outMPI); //delete filename; if (pid == 0) { //you are the root process //do your part string outputMyPart; driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } //wait on chidren for(int i = 1; i < processors; i++) { if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); } }else { //you are a child process //do your part driverMPI(start, end, outMPI, cutoff); if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } char buf[5]; strcpy(buf, "done"); //tell parent you are done. MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD); } MPI_File_close(&outMPI); }else { //lower triangle format if (pid == 0) { //you are the root process //do your part string outputMyPart; unsigned long long mySize; if (output != "square"){ driverMPI(start, end, outputFile, mySize); } else { driverMPI(start, end, outputFile, mySize, output); } if (m->control_pressed) { outputTypes.clear(); return 0; } int amode=MPI_MODE_APPEND|MPI_MODE_WRONLY|MPI_MODE_CREATE; // MPI_File outMPI; MPI_File inMPI; //char* filename = new char[outputFile.length()]; //memcpy(filename, outputFile.c_str(), outputFile.length()); char filename[1024]; strcpy(filename, outputFile.c_str()); MPI_File_open(MPI_COMM_SELF, filename, amode, MPI_INFO_NULL, &outMPI); //delete filename; //wait on chidren for(int b = 1; b < processors; b++) { unsigned long long fileSize; if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&outMPI); return 0; } MPI_Recv(&fileSize, 1, MPI_LONG, b, tag, MPI_COMM_WORLD, &status); string outTemp = outputFile + toString(b) + ".temp"; char* buf = new char[outTemp.length()]; memcpy(buf, outTemp.c_str(), outTemp.length()); MPI_File_open(MPI_COMM_SELF, buf, MPI_MODE_DELETE_ON_CLOSE|MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); delete buf; int count = 0; while (count < fileSize) { char buf2[1]; MPI_File_read(inMPI, buf2, 1, MPI_CHAR, &status); MPI_File_write(outMPI, buf2, 1, MPI_CHAR, &status); count += 1; } MPI_File_close(&inMPI); //deleted on close } MPI_File_close(&outMPI); }else { //you are a child process //do your part unsigned long long size; if (output != "square"){ driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size); } else { driverMPI(start, end, (outputFile + toString(pid) + ".temp"), size, output); } if (m->control_pressed) { return 0; } //tell parent you are done. MPI_Send(&size, 1, MPI_LONG, 0, tag, MPI_COMM_WORLD); } } MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //if you don't need to fork anything if(processors == 1){ if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } else { driver(0, numSeqs, outputFile, "square"); } }else{ //you have multiple processors createProcesses(outputFile, numSeqs); } //#else //ifstream inFASTA; //if (output != "square") { driver(0, numSeqs, outputFile, cutoff); } //else { driver(0, numSeqs, outputFile, "square"); } //#endif #endif if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } #ifdef USE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &pid); if (pid == 0) { //only one process should output to screen #endif //if (output == "square") { convertMatrix(outputFile); } ifstream fileHandle; fileHandle.open(outputFile.c_str()); if(fileHandle) { m->gobble(fileHandle); if (fileHandle.eof()) { m->mothurOut(outputFile + " is blank. This can result if there are no distances below your cutoff."); m->mothurOutEndLine(); } } //append the old column file to the new one if ((oldfastafile != "") && (column != "")) { //we had to rename the column file so we didnt overwrite above, but we want to keep old name if (outputFile == column) { string tempcolumn = column + ".old"; m->appendFiles(tempcolumn, outputFile); m->mothurRemove(tempcolumn); }else{ m->appendFiles(outputFile, column); m->mothurRemove(outputFile); outputFile = column; } if (outputDir != "") { string newOutputName = outputDir + m->getSimpleName(outputFile); rename(outputFile.c_str(), newOutputName.c_str()); m->mothurRemove(outputFile); outputFile = newOutputName; } } #ifdef USE_MPI } #endif if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFile); return 0; } //set phylip file as new current phylipfile string current = ""; itTypes = outputTypes.find("phylip"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setPhylipFile(current); } } //set column file as new current columnfile itTypes = outputTypes.find("column"); if (itTypes != outputTypes.end()) { if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setColumnFile(current); } } m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); m->mothurOut(outputFile); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - startTime) + " seconds to calculate the distances for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); if (m->isTrue(compress)) { m->mothurOut("Compressing..."); m->mothurOutEndLine(); m->mothurOut("(Replacing " + outputFile + " with " + outputFile + ".gz)"); m->mothurOutEndLine(); system(("gzip -v " + outputFile).c_str()); outputNames.push_back(outputFile + ".gz"); }else { outputNames.push_back(outputFile); } return 0; } catch(exception& e) { m->errorOut(e, "DistanceCommand", "execute"); exit(1); } }
int main(int argc, char **argv) { int buf[1024], amode, flag, mynod, len, i; MPI_File fh; MPI_Status status; MPI_Datatype newtype; MPI_Offset disp, offset; MPI_Group group; MPI_Datatype etype, filetype; char datarep[25], *filename; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { printf("\n*# Usage: misc <mpiparameter> -- -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+1); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_write(fh, buf, 1024, MPI_INT, &status); MPI_File_sync(fh); MPI_File_get_amode(fh, &amode); if (!mynod) printf("testing MPI_File_get_amode\n"); if (amode != (MPI_MODE_CREATE | MPI_MODE_RDWR)) printf("amode is %d, should be %d\n\n", amode, MPI_MODE_CREATE | MPI_MODE_RDWR); MPI_File_get_atomicity(fh, &flag); if (flag) printf("atomicity is %d, should be 0\n", flag); if (!mynod) printf("setting atomic mode\n"); MPI_File_set_atomicity(fh, 1); MPI_File_get_atomicity(fh, &flag); if (!flag) printf("atomicity is %d, should be 1\n", flag); MPI_File_set_atomicity(fh, 0); if (!mynod) printf("reverting back to nonatomic mode\n"); MPI_Type_vector(10, 10, 20, MPI_INT, &newtype); MPI_Type_commit(&newtype); MPI_File_set_view(fh, 1000, MPI_INT, newtype, "native", MPI_INFO_NULL); if (!mynod) printf("testing MPI_File_get_view\n"); MPI_File_get_view(fh, &disp, &etype, &filetype, datarep); if ((disp != 1000) || strcmp(datarep, "native")) printf("disp = %I64, datarep = %s, should be 1000, native\n\n", disp, datarep); if (!mynod) printf("testing MPI_File_get_byte_offset\n"); MPI_File_get_byte_offset(fh, 10, &disp); if (disp != (1000+20*sizeof(int))) printf("byte offset = %I64, should be %d\n\n", disp, (int) (1000+20*sizeof(int))); MPI_File_get_group(fh, &group); if (!mynod) printf("testing MPI_File_set_size\n"); MPI_File_set_size(fh, 1000+15*sizeof(int)); MPI_Barrier(MPI_COMM_WORLD); MPI_File_sync(fh); MPI_File_get_size(fh, &disp); if (disp != 1000+15*sizeof(int)) printf("file size = %I64, should be %d\n\n", disp, (int) (1000+15*sizeof(int))); if (!mynod) printf("seeking to eof and testing MPI_File_get_position\n"); MPI_File_seek(fh, 0, MPI_SEEK_END); MPI_File_get_position(fh, &disp); if (disp != 10) printf("file pointer posn = %I64, should be 10\n\n", disp); if (!mynod) printf("testing MPI_File_get_byte_offset\n"); MPI_File_get_byte_offset(fh, disp, &offset); if (offset != (1000+20*sizeof(int))) printf("byte offset = %I64, should be %d\n\n", offset, (int) (1000+20*sizeof(int))); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) printf("testing MPI_File_seek with MPI_SEEK_CUR\n"); MPI_File_seek(fh, -10, MPI_SEEK_CUR); MPI_File_get_position(fh, &disp); MPI_File_get_byte_offset(fh, disp, &offset); if (offset != 1000) printf("file pointer posn in bytes = %I64, should be 1000\n\n", offset); if (!mynod) printf("preallocating disk space up to 8192 bytes\n"); MPI_File_preallocate(fh, 8192); if (!mynod) printf("closing the file and deleting it\n"); MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) MPI_File_delete(filename, MPI_INFO_NULL); MPI_Type_free(&newtype); MPI_Type_free(&filetype); MPI_Group_free(&group); free(filename); MPI_Finalize(); return 0; }
void cache_flush_ind_all(int myid, int numprocs, int size, char *filename) { char *buf; MPI_File fh; double time; /* Calculate how much each processor must write */ int64_t ind_size = ceil(size / numprocs); int64_t comp = 0; char *ind_filename = NULL; int ind_filename_size = 0; /* We will assume that we are using less than 1,000,000 processors * therefore add 1 for NULL char and 6 for each individual processor * for 7 total */ ind_filename_size += strlen(filename) + 7; if ((ind_filename = (char *) malloc(ind_filename_size)) == NULL) { fprintf(stderr, "cache_flush_ind_all: malloc ind_filename of size" "%d failed\n", ind_filename_size); } sprintf(ind_filename, "%s%d", filename, myid); ind_size = ind_size * 1024 * 1024; /* ind_size converted to MBytes */ assert(ind_size != 0); if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL) { fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n", MAX_BUFFER_SIZE); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_SELF, ind_filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, 0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); MPI_File_seek(fh, 0, MPI_SEEK_SET); time = MPI_Wtime(); while (comp != ind_size) { if (ind_size - comp > MAX_BUFFER_SIZE) { comp += MAX_BUFFER_SIZE; MPI_File_write(fh, buf, MAX_BUFFER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE); } else { int tmp_bytes = ind_size - comp; comp += ind_size - comp; MPI_File_write(fh, buf, tmp_bytes, MPI_BYTE, MPI_STATUS_IGNORE); } } MPI_File_sync(fh); time = MPI_Wtime() - time; MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); #if 0 MPI_File_delete(ind_filename, MPI_INFO_NULL); #endif if (myid == 0) { fprintf(stderr, "cache_flush_ind_all: File(s) written of " "size %.1f MBytes\n" "Time: %f secs Bandwidth: %f MBytes / sec\n\n", comp*numprocs/1024.0/1024.0, time, comp*numprocs/1024.0/1024.0/time); } MPI_Barrier(MPI_COMM_WORLD); free(ind_filename); free(buf); }
void cache_flush_all(int myid, int numprocs, int size, char *filename) { char *buf; MPI_File fh; double time; /* Calculate how much each processor must write */ int64_t ind_size = ceil(size / numprocs); int64_t comp = 0; ind_size = ind_size * 1024 * 1024; /* ind_size converted to MBytes */ assert(ind_size != 0); if ((buf = (char *) malloc(MAX_BUFFER_SIZE * sizeof(char))) == NULL) { fprintf(stderr, "cache_flush_all: malloc buf of size %d failed\n", MAX_BUFFER_SIZE); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_set_view(fh, ind_size * myid, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL); MPI_File_seek(fh, 0, MPI_SEEK_SET); time = MPI_Wtime(); while (comp != ind_size) { if (ind_size - comp > MAX_BUFFER_SIZE) { comp += MAX_BUFFER_SIZE; MPI_File_write(fh, buf, MAX_BUFFER_SIZE, MPI_BYTE, MPI_STATUS_IGNORE); } else { int tmp_bytes = ind_size - comp; comp += ind_size - comp; MPI_File_write(fh, buf, tmp_bytes, MPI_BYTE, MPI_STATUS_IGNORE); } } free(buf); MPI_File_sync(fh); time = MPI_Wtime() - time; MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); #if 0 if (myid == 0) { MPI_File_delete(filename, MPI_INFO_NULL); fprintf(stderr, "cache_flush_all: File %s written/deleted of " "size %.1f MBytes\n" "Time: %f secs Bandwidth: %f MBytes / sec\n\n", filename, comp*numprocs/1024.0/1024.0, time, comp*numprocs/1024.0/1024.0/time); } MPI_Barrier(MPI_COMM_WORLD); #endif }
int main(int argc, char *argv[]) { int rank, size; const int N = atoi(argv[1]); // printf("Number of testcase = %d\n", N); MPI_Init (&argc, &argv); double start_time, end_time; MPI_Comm_rank (MPI_COMM_WORLD, &rank); MPI_Comm_size (MPI_COMM_WORLD, &size); // printf("My rank is %d \n", rank); //start_time = MPI_Wtime(); MPI_File fin, fout; MPI_Status status; int *root_arr; int max_arr_size = size > N ? size : N; int ret = MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_RDONLY, MPI_INFO_NULL, &fin); if (rank == ROOT) { root_arr = new int[max_arr_size+3]; // printf("Enter rank 0 statement ... \n"); MPI_File_read(fin, root_arr, N, MPI_INT, &status); /* for (int i = 0; i < N; ++i) printf("[START] [Rank %d] root_arr[%d] = %d\n", rank, i, root_arr[i]); printf("Out Rank 0 statement ... \n"); */ } MPI_File_close(&fin); MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file int rank_num = size > N ? N : size; const int LAST = rank_num - 1; int num_per_node = N / rank_num; int *local_arr; int num_per_node_diff = N - num_per_node * rank_num; int diff = num_per_node_diff; bool has_remain = false; bool has_remain_rank = rank_num % 2 ? true : false; if (num_per_node_diff > 0) { // Send remaining elements to size - 1 has_remain = true; if (rank == ROOT) { MPI_Send(root_arr + N - diff, diff, MPI_INT, LAST, 0, MPI_COMM_WORLD); } else if (rank == LAST) { // Handle special case num_per_node += num_per_node_diff; local_arr = new int[num_per_node+1]; MPI_Recv(local_arr + num_per_node - diff, diff, MPI_INT, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &status); } } else if(rank == rank_num - 1) { local_arr = new int[num_per_node+1]; } MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file if (rank != rank_num - 1) local_arr = new int[num_per_node+1]; // MPI_Scatter (send_buf, send_count, send_type, recv_buf, recv_count, recv_type, root, comm) if (rank < LAST) MPI_Scatter(root_arr, num_per_node, MPI_INT, local_arr, num_per_node, MPI_INT, ROOT, MPI_COMM_WORLD); else MPI_Scatter(root_arr, num_per_node-diff, MPI_INT, local_arr, num_per_node-diff, MPI_INT, ROOT, MPI_COMM_WORLD); // printf("[Rank %d] num_per_node_size = %d\n" ,rank, num_per_node); MPI_Barrier(MPI_COMM_WORLD); /* for (int i = 0; i < num_per_node; ++i) printf("[BEFORE] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); */ if (rank < rank_num) { std::sort(local_arr, local_arr + num_per_node); } MPI_Barrier(MPI_COMM_WORLD); /* for (int i = 0; i < num_per_node; ++i) printf("[AFTER] [Rank %d] local_arr[%d] = %d\n", rank, i, local_arr[i]); */ // printf("rank %d is arrived\n", rank); MPI_Barrier(MPI_COMM_WORLD); // Wait for rank0 to read file int *recv_buf, *send_buf; int recv_len, send_len, success; if (rank_num > 1 && rank < rank_num) { if (rank == ROOT) { send_len = num_per_node; MPI_Send(&send_len, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD); MPI_Recv(&success, 1, MPI_INT, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Send(local_arr, send_len, MPI_INT, rank+1, 0, MPI_COMM_WORLD); } else { MPI_Recv(&recv_len, 1, MPI_INT, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); success = 1; MPI_Send(&success, 1, MPI_INT, rank-1, 0, MPI_COMM_WORLD); send_len = recv_len + num_per_node; recv_buf = new int[recv_len]; send_buf = new int[send_len]; // printf("RANK %d recv_len = %d SUCCESS\n", rank, recv_len); MPI_Recv(recv_buf, recv_len, MPI_INT, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); // printf("RANK %d complete recevice array SUCCESS\n", rank); int i = 0, j = 0, cur = 0; while (i < recv_len && j < num_per_node) { // Do MERGE array if (recv_buf[i] < local_arr[j]) { send_buf[cur++] = recv_buf[i++]; } else { send_buf[cur++] = local_arr[j++]; } } while (i < recv_len) send_buf[cur++] = recv_buf[i++]; while (j < num_per_node) send_buf[cur++] = local_arr[j++]; /* for (int k = 0; k < cur; k++) { printf("[RANK %d] send_buf[%d] = %d\n", rank, k, send_buf[k]); } */ if(rank != LAST) { MPI_Send(&send_len, 1, MPI_INT, rank+1, 0, MPI_COMM_WORLD); // printf("RANK %d send_len SUCCESS\n", rank); MPI_Recv(&success, 1, MPI_INT, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &status); MPI_Send(send_buf, send_len, MPI_INT, rank+1, 0, MPI_COMM_WORLD); // printf("RANK %d complete sending array SUCCESS\n", rank); } if(rank != LAST) delete [] send_buf; delete [] recv_buf; } } // printf("rank %d is arrived\n", rank); MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, argv[3], MPI_MODE_RDWR | MPI_MODE_CREATE, MPI_INFO_NULL, &fout); if (rank == LAST) { if (rank == 0) send_buf = local_arr; MPI_File_write(fout, send_buf, N, MPI_INT, &status); /* for (int i = 0; i < N; ++i) { printf("[FINAL] [Rank %d] ans[%d] = %d\n", rank, i, send_buf[i]); } */ } MPI_File_close(&fout); // printf("CLOSE rank %d is arrived\n", rank); MPI_Barrier(MPI_COMM_WORLD); if (rank != 0) { delete [] local_arr; // printf("[FREE] [RANK %d] SUCCESS FREE\n", rank); } else { delete [] root_arr; delete [] local_arr;; } MPI_Finalize(); return 0; }
main(int argc, char* argv[]){ clock_t start, end; unsigned int cpu_time_used; unsigned int i, j, rank, numProcesses, blockLength; unsigned int *compBlockLengthArray; unsigned int distinctCharacterCount, combinedHuffmanNodes, frequency[256], inputFileLength, compBlockLength; unsigned char *inputFileData, *compressedData, writeBit = 0, bitsFilled = 0, bitSequence[255], bitSequenceLength = 0; FILE *inputFile; MPI_Init( &argc, &argv); MPI_File mpi_inputFile, mpi_compressedFile; MPI_Status status; // get rank and number of processes value MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &numProcesses); // get file size if(rank == 0){ inputFile = fopen(argv[1], "rb"); fseek(inputFile, 0, SEEK_END); inputFileLength = ftell(inputFile); fseek(inputFile, 0, SEEK_SET); fclose(inputFile); } //broadcast size of file to all the processes MPI_Bcast(&inputFileLength, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD); // get file chunk size blockLength = inputFileLength / numProcesses; if(rank == (numProcesses-1)){ blockLength = inputFileLength - ((numProcesses-1) * blockLength); } // open file in each process and read data and allocate memory for compressed data MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDONLY, MPI_INFO_NULL, &mpi_inputFile); MPI_File_seek(mpi_inputFile, rank * blockLength, MPI_SEEK_SET); inputFileData = (unsigned char *)malloc(blockLength * sizeof(unsigned char)); MPI_File_read(mpi_inputFile, inputFileData, blockLength, MPI_UNSIGNED_CHAR, &status); // start clock if(rank == 0){ start = clock(); } // find the frequency of each symbols for (i = 0; i < 256; i++){ frequency[i] = 0; } for (i = 0; i < blockLength; i++){ frequency[inputFileData[i]]++; } compressedData = (unsigned char *)malloc(blockLength * sizeof(unsigned char)); compBlockLengthArray = (unsigned int *)malloc(numProcesses * sizeof(unsigned int)); // initialize nodes of huffman tree distinctCharacterCount = 0; for (i = 0; i < 256; i++){ if (frequency[i] > 0){ huffmanTreeNode[distinctCharacterCount].count = frequency[i]; huffmanTreeNode[distinctCharacterCount].letter = i; huffmanTreeNode[distinctCharacterCount].left = NULL; huffmanTreeNode[distinctCharacterCount].right = NULL; distinctCharacterCount++; } } // build tree for (i = 0; i < distinctCharacterCount - 1; i++){ combinedHuffmanNodes = 2 * i; sortHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes); buildHuffmanTree(i, distinctCharacterCount, combinedHuffmanNodes); } // build table having the bitSequence sequence and its length buildHuffmanDictionary(head_huffmanTreeNode, bitSequence, bitSequenceLength); // compress compBlockLength = 0; for (i = 0; i < blockLength; i++){ for (j = 0; j < huffmanDictionary[inputFileData[i]].bitSequenceLength; j++){ if (huffmanDictionary[inputFileData[i]].bitSequence[j] == 0){ writeBit = writeBit << 1; bitsFilled++; } else{ writeBit = (writeBit << 1) | 01; bitsFilled++; } if (bitsFilled == 8){ compressedData[compBlockLength] = writeBit; bitsFilled = 0; writeBit = 0; compBlockLength++; } } } if (bitsFilled != 0){ for (i = 0; (unsigned char)i < 8 - bitsFilled; i++){ writeBit = writeBit << 1; } compressedData[compBlockLength] = writeBit; compBlockLength++; } // calculate length of compressed data compBlockLength = compBlockLength + 1024; compBlockLengthArray[rank] = compBlockLength; // send the length of each process to process 0 MPI_Gather(&compBlockLength, 1, MPI_UNSIGNED, compBlockLengthArray, 1, MPI_UNSIGNED, 0, MPI_COMM_WORLD); // update the data to reflect the offset if(rank == 0){ compBlockLengthArray[0] = (numProcesses + 2) * 4 + compBlockLengthArray[0]; for(i = 1; i < numProcesses; i++){ compBlockLengthArray[i] = compBlockLengthArray[i] + compBlockLengthArray[i - 1]; } for(i = (numProcesses - 1); i > 0; i--){ compBlockLengthArray[i] = compBlockLengthArray[i - 1]; } compBlockLengthArray[0] = (numProcesses + 2) * 4; } // broadcast size of each compressed data block to all the processes MPI_Bcast(compBlockLengthArray, numProcesses, MPI_UNSIGNED, 0, MPI_COMM_WORLD); // get time if(rank == 0){ end = clock(); cpu_time_used = ((end - start)) * 1000 / CLOCKS_PER_SEC; printf("Time taken: %d:%d s\n", cpu_time_used / 1000, cpu_time_used % 1000); } // write data to file MPI_File_open(MPI_COMM_WORLD, argv[2], MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_compressedFile); if(rank == 0){ MPI_File_write(mpi_compressedFile, &inputFileLength, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE); MPI_File_write(mpi_compressedFile, &numProcesses, 1, MPI_UNSIGNED, MPI_STATUS_IGNORE); MPI_File_write(mpi_compressedFile, compBlockLengthArray, numProcesses, MPI_UNSIGNED, MPI_STATUS_IGNORE); } MPI_File_seek(mpi_compressedFile, compBlockLengthArray[rank], MPI_SEEK_SET); MPI_File_write(mpi_compressedFile, frequency, 256, MPI_UNSIGNED, MPI_STATUS_IGNORE); MPI_File_write(mpi_compressedFile, compressedData, (compBlockLength - 1024), MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); // close open files MPI_File_close(&mpi_compressedFile); MPI_File_close(&mpi_inputFile); MPI_Barrier(MPI_COMM_WORLD); free(head_huffmanTreeNode); free(current_huffmanTreeNode); free(compBlockLengthArray); free(inputFileData); free(compressedData); MPI_Finalize(); }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int initFlag; MPI_Initialized(&initFlag); if (!initFlag) { printf("MPI init failed\n"); return 8; } MPI_Comm_rank(MPI_COMM_WORLD, &proc_rank); MPI_Comm_size(MPI_COMM_WORLD, &world_size); int l,mm=5; int nx,ny,nz,lt,nedge; float frequency; float velmax; float dt; int ncx_shot1,ncy_shot1,ncz_shot; int ishot,ncy_shot,ncx_shot; float unit; int nxshot,nyshot,dxshot,dyshot; char infile[80],outfile[80],logfile[80],tmp[80], nodelog[84]; FILE *fin, *fout, *flog, *fnode; MPI_File mpi_flog, mpi_fout; MPI_Status mpi_status; struct timeval start,end; float all_time; float *u, *v, *w, *up, *up1, *up2, *vp, *vp1, *vp2, *wp, *wp1, *wp2, *us, *us1, *us2, *vs, *vs1, *vs2, *ws, *ws1, *ws2, *vpp, *density, *vss; float c[5][7]; float *wave; float nshot,t0,tt,c0; float dtx,dtz,dtxz,dr1,dr2,dtx4,dtz4,dtxz4; char message[100]; if(argc<4) { printf("please add 3 parameter: inpurfile, outfile, logfile\n"); exit(1); } message[99] = 0; // Avoid string buffer overrun strcpy(infile,argv[1]); strcpy(outfile,argv[2]); strcpy(logfile,argv[3]); strcpy(nodelog,logfile); strcat(nodelog, ".node"); strcpy(tmp,"date "); strncat(tmp, ">> ",3); strncat(tmp, logfile, strlen(logfile)); if (proc_rank == 0) { flog = fopen(logfile,"w"); fprintf(flog,"------------start time------------\n"); fclose(flog); system(tmp); gettimeofday(&start,NULL); } fin = fopen(infile,"r"); if(fin == NULL) { printf("file %s is not exist\n",infile); exit(2); } fscanf(fin,"nx=%d\n",&nx); fscanf(fin,"ny=%d\n",&ny); fscanf(fin,"nz=%d\n",&nz); fscanf(fin,"lt=%d\n",<); fscanf(fin,"nedge=%d\n",&nedge); fscanf(fin,"ncx_shot1=%d\n",&ncx_shot1); fscanf(fin,"ncy_shot1=%d\n",&ncy_shot1); fscanf(fin,"ncz_shot=%d\n",&ncz_shot); fscanf(fin,"nxshot=%d\n",&nxshot); fscanf(fin,"nyshot=%d\n",&nyshot); fscanf(fin,"frequency=%f\n",&frequency); fscanf(fin,"velmax=%f\n",&velmax); fscanf(fin,"dt=%f\n",&dt); fscanf(fin,"unit=%f\n",&unit); fscanf(fin,"dxshot=%d\n",&dxshot); fscanf(fin,"dyshot=%d\n",&dyshot); fclose(fin); if (proc_rank == 0) { // Master printf("\n--------workload parameter--------\n"); printf("nx=%d\n",nx); printf("ny=%d\n",ny); printf("nz=%d\n",nz); printf("lt=%d\n",lt); printf("nedge=%d\n",nedge); printf("ncx_shot1=%d\n",ncx_shot1); printf("ncy_shot1=%d\n",ncy_shot1); printf("ncz_shot=%d\n",ncz_shot); printf("nxshot=%d\n",nxshot); printf("nyshot=%d\n",nyshot); printf("frequency=%f\n",frequency); printf("velmax=%f\n",velmax); printf("dt=%f\n",dt); printf("unit=%f\n",unit); printf("dxshot=%d\n",dxshot); printf("dyshot=%d\n\n",dyshot); flog = fopen(logfile,"a"); fprintf(flog,"\n--------workload parameter--------\n"); fprintf(flog,"nx=%d\n",nx); fprintf(flog,"ny=%d\n",ny); fprintf(flog,"nz=%d\n",nz); fprintf(flog,"lt=%d\n",lt); fprintf(flog,"nedge=%d\n",nedge); fprintf(flog,"ncx_shot1=%d\n",ncx_shot1); fprintf(flog,"ncy_shot1=%d\n",ncy_shot1); fprintf(flog,"ncz_shot=%d\n",ncz_shot); fprintf(flog,"nxshot=%d\n",nxshot); fprintf(flog,"nyshot=%d\n",nyshot); fprintf(flog,"frequency=%f\n",frequency); fprintf(flog,"velmax=%f\n",velmax); fprintf(flog,"dt=%f\n",dt); fprintf(flog,"unit=%f\n",unit); fprintf(flog,"dxshot=%d\n",dxshot); fprintf(flog,"dyshot=%d\n\n",dyshot); fclose(flog); fnode = fopen(nodelog, "a"); fprintf(fnode,"World size: %d\n", world_size); fclose(fnode); } #ifdef _WITH_PHI // [Afa] It is recommended that for Intel Xeon Phi data is 64-byte aligned. // Upon successful completion, posix_memalign() shall return zero if (posix_memalign((void **)&u , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&v , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&w , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&up2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vp2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&wp2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&us2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&vs2, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws , 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws1, 64, sizeof(float)*nz*ny*nx)) return 2; if (posix_memalign((void **)&ws2, 64, sizeof(float)*nz*ny*nx)) return 2; #else u = (float*)malloc(sizeof(float)*nz*ny*nx); v = (float*)malloc(sizeof(float)*nz*ny*nx); w = (float*)malloc(sizeof(float)*nz*ny*nx); up = (float*)malloc(sizeof(float)*nz*ny*nx); up1 = (float*)malloc(sizeof(float)*nz*ny*nx); up2 = (float*)malloc(sizeof(float)*nz*ny*nx); vp = (float*)malloc(sizeof(float)*nz*ny*nx); vp1 = (float*)malloc(sizeof(float)*nz*ny*nx); vp2 = (float*)malloc(sizeof(float)*nz*ny*nx); wp = (float*)malloc(sizeof(float)*nz*ny*nx); wp1 = (float*)malloc(sizeof(float)*nz*ny*nx); wp2 = (float*)malloc(sizeof(float)*nz*ny*nx); us = (float*)malloc(sizeof(float)*nz*ny*nx); us1 = (float*)malloc(sizeof(float)*nz*ny*nx); us2 = (float*)malloc(sizeof(float)*nz*ny*nx); vs = (float*)malloc(sizeof(float)*nz*ny*nx); vs1 = (float*)malloc(sizeof(float)*nz*ny*nx); vs2 = (float*)malloc(sizeof(float)*nz*ny*nx); ws = (float*)malloc(sizeof(float)*nz*ny*nx); ws1 = (float*)malloc(sizeof(float)*nz*ny*nx); ws2 = (float*)malloc(sizeof(float)*nz*ny*nx); #endif // [Afa] Those are not offloaded to phi yet vpp = (float*)malloc(sizeof(float)*nz*ny*nx); density = (float*)malloc(sizeof(float)*nz*ny*nx); vss = (float*)malloc(sizeof(float)*nz*ny*nx); wave = (float*)malloc(sizeof(float)*lt); nshot=nxshot*nyshot; t0=1.0/frequency; // [Afa] Branch optmization // TODO: Will compiler optimize the `condition'? // i.e Can I write `for(i=0;i< (nz < 210 ? nz : 210);i++)'? int condition = nz < 210 ? nz : 210; for(int i=0; i < condition;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=2300.; vss[i*ny*nx+j*nx+k]=1232.; density[i*ny*nx+j*nx+k]=1.; } } } condition = nz < 260 ? nz : 260; for(int i=210; i < condition;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=2800.; vss[i*ny*nx+j*nx+k]=1509.; density[i*ny*nx+j*nx+k]=2.; } } } for(int i=260;i<nz;i++) { for(int j=0;j<ny;j++) { for(int k=0;k<nx;k++) { vpp[i*ny*nx+j*nx+k]=3500.; vss[i*ny*nx+j*nx+k]=1909.; density[i*ny*nx+j*nx+k]=2.5; } } } for(l=0;l<lt;l++) { tt=l*dt; tt=tt-t0; float sp=PIE*frequency*tt; float fx=100000.*exp(-sp*sp)*(1.-2.*sp*sp); wave[l]=fx; } // TODO: [Afa] Data produced by code below are static. See table below if(mm==5) { c0=-2.927222164; c[0][0]=1.66666665; c[1][0]=-0.23809525; c[2][0]=0.03968254; c[3][0]=-0.004960318; c[4][0]=0.0003174603; } c[0][1]=0.83333; c[1][1]=-0.2381; c[2][1]=0.0595; c[3][1]=-0.0099; c[4][1]=0.0008; for(int i=0;i<5;i++) for(int j=0;j<5;j++) c[j][2+i]=c[i][1]*c[j][1]; /* * mm == 5, c = * 1.666667 0.833330 0.694439 -0.198416 0.049583 -0.008250 0.000667 * -0.238095 -0.238100 -0.198416 0.056692 -0.014167 0.002357 -0.000190 * 0.039683 0.059500 0.049583 -0.014167 0.003540 -0.000589 0.000048 * -0.004960 -0.009900 -0.008250 0.002357 -0.000589 0.000098 -0.000008 * 0.000317 0.000800 0.000667 -0.000190 0.000048 -0.000008 0.000001 */ /* * mm != 5, c = * 0.000000 0.833330 0.694439 -0.198416 0.049583 -0.008250 0.000667 * 0.000000 -0.238100 -0.198416 0.056692 -0.014167 0.002357 -0.000190 * 0.000000 0.059500 0.049583 -0.014167 0.003540 -0.000589 0.000048 * 0.000000 -0.009900 -0.008250 0.002357 -0.000589 0.000098 -0.000008 * 0.000000 0.000800 0.000667 -0.000190 0.000048 -0.000008 0.000001 */ dtx=dt/unit; dtz=dt/unit; dtxz=dtx*dtz; dr1=dtx*dtx/2.; dr2=dtz*dtz/2.; dtx4=dtx*dtx*dtx*dtx; dtz4=dtz*dtz*dtz*dtz; dtxz4=dtx*dtx*dtz*dtz; if (proc_rank == 0) { fout = fopen(outfile, "wb"); fclose(fout); } // [Afa] Truncate file. We need a prettier way MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, outfile, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_fout); MPI_File_open(MPI_COMM_WORLD, nodelog, MPI_MODE_WRONLY, MPI_INFO_NULL, &mpi_flog); // [Afa] *About Nodes Number* nshot (i.e nxshot * nyshot) should be multiple of node numbers, // or there will be hungry processes int loop_per_proc = ((int)nshot % world_size == 0) ? (nshot / world_size) : (nshot / world_size + 1); printf("\x1B[31mDEBUG:\x1b[39;49m World size %d, Loop per Proc %d, nshot %f, I am No. %d\n", world_size, loop_per_proc, nshot, proc_rank); // for(ishot=1;ishot<=nshot;ishot++) // [Afa] nshot is 20 in para1.in, but 200 in para2.in for (int loop_index = 0; loop_index < loop_per_proc; ++loop_index) { ishot = loop_index + proc_rank * loop_per_proc + 1; // [Afa] See commented code 2 lines above to understand this line if (ishot <= nshot) { // [Afa] ishot <= nshot printf("shot %d, process %d\n",ishot, proc_rank); snprintf(message, 29, "shot %6d, process %6d\n", ishot, proc_rank); // [Afa] Those numbers: MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET); // 28: string without '\0' MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status); // 29: with '\0' } else { printf("shot HUNGRY, process %d\n", proc_rank); snprintf(message, 29, "shot HUNGRY, process %6d\n", proc_rank); MPI_File_seek(mpi_flog, 28 * (ishot - 1), MPI_SEEK_SET); MPI_File_write(mpi_flog, message, 28, MPI_CHAR, &mpi_status); continue; } ncy_shot=ncy_shot1+(ishot/nxshot)*dyshot; ncx_shot=ncx_shot1+(ishot%nxshot)*dxshot; // [Afa] Matrix is zeroed in every loop // i.e. The relation between those matrices in each loop is pretty loose // Matrices not zeroed are: vpp, density, vss and wave, and they're not changed (read-only) // We only need to partially collect matrix `up' // TODO: [Afa] Get a better way to pass those pointers, and mark them as `restrict' // And WHY are they using cpp as extension? C++11 doesn't support `restrict' zero_matrices(u, w, ws2, up2, vp1, wp1, us, ws, wp, us2, us1, wp2, v, up1, nz, nx, up, ny, ws1, vs, vp2, vs1, vs2, vp); for(l=1;l<=lt;l++) { float xmax=l*dt*velmax; int nleft=ncx_shot-xmax/unit-10; int nright=ncx_shot+xmax/unit+10; int nfront=ncy_shot-xmax/unit-10; int nback=ncy_shot+xmax/unit+10; int ntop=ncz_shot-xmax/unit-10; int nbottom=ncz_shot+xmax/unit+10; if(nleft<5) nleft=5; if(nright>nx-5) nright=nx-5; if(nfront<5) nfront=5; if(nback>ny-5) nback=ny-5; if(ntop<5) ntop=5; if(nbottom>nz-5) nbottom=nz-5; ntop = ntop-1; nfront = nfront-1; nleft = nleft-1; // Although up, vp, wp, us, vs, ws are modified below, we're sure there's no race condition. // Each loop accesses a UNIQUE element in the array, and the value is not used, no need to worry about the dirty cache #pragma omp parallel for shared(u) shared(v) shared(w) shared(up1) shared(up2) shared(vp1) shared(vp2) shared(wp1) \ shared(wp2) shared(us) shared(us1) shared(us2) shared(vs) shared(vs1) shared(vs2) shared(ws) shared(ws1) shared(ws2) \ shared(vss) shared(vpp) shared(dr1) shared(dr2) shared(dtz) shared(dtx) shared(ncx_shot) shared(ncy_shot) shared(ncz_shot) \ shared(wave) for(int k=ntop;k<nbottom;k++) { for(int j=nfront;j<nback;j++) { for(int i=nleft;i<nright;i++) { float vvp2,drd1,drd2,vvs2; float px,sx; if(i==ncx_shot-1&&j==ncy_shot-1&&k==ncz_shot-1) { px=1.; sx=0.; } else { px=0.; sx=0.; } vvp2=vpp[k*ny*nx+j*nx+i]*vpp[k*ny*nx+j*nx+i]; drd1=dr1*vvp2; drd2=dr2*vvp2; vvs2=vss[k*ny*nx+j*nx+i]*vss[k*ny*nx+j*nx+i]; drd1=dr1*vvs2; drd2=dr2*vvs2; float tempux2=0.0f; float tempuy2=0.0f; float tempuz2=0.0f; float tempvx2=0.0f; float tempvy2=0.0f; float tempvz2=0.0f; float tempwx2=0.0f; float tempwy2=0.0f; float tempwz2=0.0f; float tempuxz=0.0f; float tempuxy=0.0f; float tempvyz=0.0f; float tempvxy=0.0f; float tempwxz=0.0f; float tempwyz=0.0f; // This will make the compiler do the vectorization for(int kk=1;kk<=mm;kk++) { tempux2 += c[kk-1][0]*(u[k*ny*nx+j*nx+(i+kk)]+u[k*ny*nx+j*nx+(i-kk)]); tempuy2 += c[kk-1][0]*(u[k*ny*nx+(j+kk)*nx+i]+u[k*ny*nx+(j-kk)*nx+i]); tempuz2 += c[kk-1][0]*(u[(k+kk)*ny*nx+j*nx+i]+u[(k-kk)*ny*nx+j*nx+i]); } for(int kk=1;kk<=mm;kk++) { tempvx2 += c[kk-1][0]*(v[k*ny*nx+j*nx+(i+kk)]+v[k*ny*nx+j*nx+(i-kk)]); tempvy2 += c[kk-1][0]*(v[k*ny*nx+(j+kk)*nx+i]+v[k*ny*nx+(j-kk)*nx+i]); tempvz2 += c[kk-1][0]*(v[(k+kk)*ny*nx+j*nx+i]+v[(k-kk)*ny*nx+j*nx+i]); } for(int kk=1;kk<=mm;kk++) { tempwx2 += c[kk-1][0]*(w[k*ny*nx+j*nx+(i+kk)]+w[k*ny*nx+j*nx+(i-kk)]); tempwy2 += c[kk-1][0]*(w[k*ny*nx+(j+kk)*nx+i]+w[k*ny*nx+(j-kk)*nx+i]); tempwz2 += c[kk-1][0]*(w[(k+kk)*ny*nx+j*nx+i]+w[(k-kk)*ny*nx+j*nx+i]); } //for(kk=1;kk<=mm;kk++) end tempux2=(tempux2+c0*u[k*ny*nx+j*nx+i])*vvp2*dtx*dtx; // u[k][j][i] tempuy2=(tempuy2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; // u[k][j][i] tempuz2=(tempuz2+c0*u[k*ny*nx+j*nx+i])*vvs2*dtz*dtz; // u[k][j][i] tempvx2=(tempvx2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempvy2=(tempvy2+c0*v[k*ny*nx+j*nx+i])*vvp2*dtx*dtx; tempvz2=(tempvz2+c0*v[k*ny*nx+j*nx+i])*vvs2*dtz*dtz; tempwx2=(tempwx2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempwy2=(tempwy2+c0*w[k*ny*nx+j*nx+i])*vvs2*dtx*dtx; tempwz2=(tempwz2+c0*w[k*ny*nx+j*nx+i])*vvp2*dtz*dtz; // This loop is auto-vectorized for(int kk=1;kk<=mm;kk++) { for(int kkk=1;kkk<=mm;kkk++) { tempuxz=tempuxz+c[kkk-1][1+kk]*(u[(k+kkk)*ny*nx+j*nx+(i+kk)] -u[(k-kkk)*ny*nx+j*nx+(i+kk)] +u[(k-kkk)*ny*nx+j*nx+(i-kk)] -u[(k+kkk)*ny*nx+j*nx+(i-kk)]); // u[k+kkk][j][i+kk], u[k-kkk][j][i+kk], u[k-kkk][j][i-kk], u[k+kkk][j][i-kk] tempuxy=tempuxy+c[kkk-1][1+kk]*(u[k*ny*nx+(j+kkk)*nx+(i+kk)] -u[k*ny*nx+(j-kkk)*nx+(i+kk)] +u[k*ny*nx+(j-kkk)*nx+(i-kk)] -u[k*ny*nx+(j+kkk)*nx+(i-kk)]); tempvyz=tempvyz+c[kkk-1][1+kk]*(v[(k+kkk)*ny*nx+(j+kk)*nx+i] -v[(k-kkk)*ny*nx+(j+kk)*nx+i] +v[(k-kkk)*ny*nx+(j-kk)*nx+i] -v[(k+kkk)*ny*nx+(j-kk)*nx+i]); tempvxy=tempvxy+c[kkk-1][1+kk]*(v[k*ny*nx+(j+kkk)*nx+(i+kk)] -v[k*ny*nx+(j-kkk)*nx+(i+kk)] +v[k*ny*nx+(j-kkk)*nx+(i-kk)] -v[k*ny*nx+(j+kkk)*nx+(i-kk)]); tempwyz=tempwyz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+(j+kk)*nx+i] -w[(k-kkk)*ny*nx+(j+kk)*nx+i] +w[(k-kkk)*ny*nx+(j-kk)*nx+i] -w[(k+kkk)*ny*nx+(j-kk)*nx+i]); tempwxz=tempwxz+c[kkk-1][1+kk]*(w[(k+kkk)*ny*nx+j*nx+(i+kk)] -w[(k-kkk)*ny*nx+j*nx+(i+kk)] +w[(k-kkk)*ny*nx+j*nx+(i-kk)] -w[(k+kkk)*ny*nx+j*nx+(i-kk)]); } // for(kkk=1;kkk<=mm;kkk++) end } //for(kk=1;kk<=mm;kk++) end // LValues below are only changed here up[k*ny*nx+j*nx+i]=2.*up1[k*ny*nx+j*nx+i]-up2[k*ny*nx+j*nx+i] +tempux2+tempwxz*vvp2*dtz*dtx +tempvxy*vvp2*dtz*dtx; // up1[k][j][j], up2[k][j][i], up[k][j][i] vp[k*ny*nx+j*nx+i]=2.*vp1[k*ny*nx+j*nx+i]-vp2[k*ny*nx+j*nx+i] +tempvy2+tempuxy*vvp2*dtz*dtx +tempwyz*vvp2*dtz*dtx; wp[k*ny*nx+j*nx+i]=2.*wp1[k*ny*nx+j*nx+i]-wp2[k*ny*nx+j*nx+i] +tempwz2+tempuxz*vvp2*dtz*dtx +tempvyz*vvp2*dtz*dtx +px*wave[l-1]; us[k*ny*nx+j*nx+i]=2.*us1[k*ny*nx+j*nx+i]-us2[k*ny*nx+j*nx+i]+tempuy2+tempuz2 -tempvxy*vvs2*dtz*dtx-tempwxz*vvs2*dtz*dtx; vs[k*ny*nx+j*nx+i]=2.*vs1[k*ny*nx+j*nx+i]-vs2[k*ny*nx+j*nx+i]+tempvx2+tempvz2 -tempuxy*vvs2*dtz*dtx-tempwyz*vvs2*dtz*dtx; ws[k*ny*nx+j*nx+i]=2.*ws1[k*ny*nx+j*nx+i]-ws2[k*ny*nx+j*nx+i]+tempwx2+tempwy2 -tempuxz*vvs2*dtz*dtx-tempvyz*vvs2*dtz*dtx; }//for(i=nleft;i<nright;i++) end } } // Again, those are UNIQUE access. Safe to share #pragma omp parallel for shared(up) shared(us) shared(vp) shared(vs) shared(wp) shared(ws) shared(u) shared(v) shared(w) \ shared(up2) shared(up1) shared(us2) shared(us1) shared(vp2) shared(vp1) shared(wp2) shared(wp1) shared(ws2) shared(ws1) for(int k=ntop;k<nbottom;k++) for(int j=nfront;j<nback;j++) for(int i=nleft;i<nright;i++) { u[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]+us[k*ny*nx+j*nx+i]; v[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]+vs[k*ny*nx+j*nx+i]; w[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]+ws[k*ny*nx+j*nx+i]; up2[k*ny*nx+j*nx+i]=up1[k*ny*nx+j*nx+i]; up1[k*ny*nx+j*nx+i]=up[k*ny*nx+j*nx+i]; us2[k*ny*nx+j*nx+i]=us1[k*ny*nx+j*nx+i]; us1[k*ny*nx+j*nx+i]=us[k*ny*nx+j*nx+i]; vp2[k*ny*nx+j*nx+i]=vp1[k*ny*nx+j*nx+i]; vp1[k*ny*nx+j*nx+i]=vp[k*ny*nx+j*nx+i]; vs2[k*ny*nx+j*nx+i]=vs1[k*ny*nx+j*nx+i]; vs1[k*ny*nx+j*nx+i]=vs[k*ny*nx+j*nx+i]; wp2[k*ny*nx+j*nx+i]=wp1[k*ny*nx+j*nx+i]; wp1[k*ny*nx+j*nx+i]=wp[k*ny*nx+j*nx+i]; ws2[k*ny*nx+j*nx+i]=ws1[k*ny*nx+j*nx+i]; ws1[k*ny*nx+j*nx+i]=ws[k*ny*nx+j*nx+i]; }//for(i=nleft;i<nright;i++) end }//for(l=1;l<=lt;l++) end // [Afa] Do we need to keep the order of data? // [Afa Update] Yes, we do need to KEEP THE ORDER of data // fwrite(up+169*ny*nx,sizeof(float),ny*nx,fout); // This is the original fwrite MPI_File_seek(mpi_fout, (ishot - 1) * ny * nx * sizeof(float), MPI_SEEK_SET); MPI_File_write(mpi_fout, up + 169 * ny * nx, ny * nx, MPI_FLOAT, &mpi_status); }//for(ishot=1;ishot<=nshot;ishot++) end MPI_File_close(&mpi_fout); MPI_File_close(&mpi_flog); free(u); free(v); free(w); free(up); free(up1); free(up2); free(vp); free(vp1); free(vp2); free(wp); free(wp1); free(wp2); free(us); free(us1); free(us2); free(vs); free(vs1); free(vs2); free(ws); free(ws1); free(ws2); free(vpp); free(density); free(vss); free(wave); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); if (proc_rank == 0) { gettimeofday(&end,NULL); all_time = (end.tv_sec-start.tv_sec)+(float)(end.tv_usec-start.tv_usec)/1000000.0; printf("run time:\t%f s\n",all_time); flog = fopen(logfile,"a"); fprintf(flog,"\nrun time:\t%f s\n\n",all_time); fclose(flog); flog = fopen(logfile,"a"); fprintf(flog,"------------end time------------\n"); fclose(flog); system(tmp); } // Why return 1? return 0; }
int main(int argc, char *argv[]) { int i, n, nlocal; int numprocs, dims[2], periods[2], keep_dims[2]; int myrank, my2drank, mycoords[2]; MPI_File f; char* filename = "input/16"; MPI_Comm comm_2d, comm_row, comm_col; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numprocs); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); dims[ROW] = dims[COL] = sqrt(numprocs); periods[ROW] = periods[COL] = 1; MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d); MPI_Comm_rank(comm_2d, &my2drank); MPI_Cart_coords(comm_2d, my2drank, 2, mycoords); keep_dims[ROW] = 0; keep_dims[COL] = 1; MPI_Cart_sub(comm_2d, keep_dims, &comm_row); keep_dims[ROW] = 1; keep_dims[COL] = 0; MPI_Cart_sub(comm_2d, keep_dims, &comm_col); if(MPI_File_open(comm_2d, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &f) != MPI_SUCCESS) { fprintf(stderr, "Cannot open file %s\n", filename); MPI_Abort(comm_2d, FILE_NOT_FOUND); MPI_Finalize(); return 1; } MPI_File_seek(f, 0, MPI_SEEK_SET); MPI_File_read(f, &n, 1, MPI_INT, &status); nlocal = n/dims[ROW]; int *a = (int *)malloc(nlocal * nlocal * sizeof(int)); for(i = 0; i < nlocal; i++) { MPI_File_seek(f, ((mycoords[0] * nlocal + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET); MPI_File_read(f, &a[i * nlocal], nlocal, MPI_INT, &status); } MPI_File_close(&f); int j; if(my2drank == 3) { for(i = 0; i < nlocal; i++) { for(j = 0; j < nlocal; j++) { printf("%d ", a[i * nlocal +j]); } printf("\n"); } } double start = MPI_Wtime(); floyd_all_pairs_sp_2d(n, nlocal, a, comm_2d, comm_row, comm_col); double stop = MPI_Wtime(); printf("[%d] Completed in %1.3f seconds\n", my2drank, stop-start); MPI_Comm_free(&comm_col); MPI_Comm_free(&comm_row); if(my2drank == 3) { for(i = 0; i < nlocal; i++) { for(j = 0; j < nlocal; j++) { printf("%d ", a[i * nlocal +j]); } printf("\n"); } } if(MPI_File_open(comm_2d, "output/16", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &f) != MPI_SUCCESS) { printf("Cannot open file %s\n", "out"); MPI_Abort(comm_2d, FILE_NOT_FOUND); MPI_Finalize(); return 1; } if(my2drank == 0) { MPI_File_seek(f, 0, MPI_SEEK_SET); MPI_File_write(f, &n, 1, MPI_INT, &status); } for(i = 0; i < nlocal; i++) { MPI_File_seek(f, ((mycoords[0] * nlocal + i) * n + mycoords[1] * nlocal + 1) * sizeof(int), MPI_SEEK_SET); MPI_File_write(f, &a[i * nlocal], nlocal, MPI_INT, &status); } MPI_File_close(&f); free(a); MPI_Comm_free(&comm_2d); MPI_Finalize(); return 0; }
void step4(inst i, int r, int s) { inst instance = i; int rank = r; int size = s; // Creation of the 2D torus we will then use MPI_Comm comm; int dim[2] = {instance.p, instance.q}; int period[2] = {1, 1}; int reorder = 0; int coord[2]; MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &comm); MPI_Cart_coords(comm, rank, 2, coord); grid global_grid; char type = 0; MPI_File input_file; // We start by reading the header of the file MPI_File_open(comm, instance.input_path, MPI_MODE_RDONLY, MPI_INFO_NULL, &input_file); MPI_File_read_all(input_file, &type, 1, MPI_CHAR, MPI_STATUS_IGNORE); if(type == 1) { if (rank == 0) fprintf(stderr, "Error: type 1 files are not supported in step 4\n"); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(EXIT_FAILURE); } // we needed to swap the next 2 lines MPI_File_read_all(input_file, &(global_grid.n), 1, MPI_UINT64_T, MPI_STATUS_IGNORE); MPI_File_read_all(input_file, &(global_grid.m), 1, MPI_UINT64_T, MPI_STATUS_IGNORE); #ifdef DEBUG if(rank == 0) printf("n, m = %zu %zu\n", global_grid.n, global_grid.m); #endif if(!(global_grid.n % instance.p == 0 && global_grid.m % instance.q == 0)) { if(rank == 0) fprintf(stderr, "Error: please choose the grid parameters so they divide the grid of the cellular automaton. For example %zu %zu, but you need to move from %d procs to %zu\n", instance.p + (global_grid.n % instance.p), instance.q + (global_grid.m % instance.q), size, (instance.p + (global_grid.n % instance.p))*(instance.q + (global_grid.m % instance.q))); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(EXIT_FAILURE); } size_t local_nrows = global_grid.n/instance.p; size_t local_ncols = global_grid.m/instance.q; // Now we create the data structures. int blocks[2] = {1, 2}; MPI_Datatype types[2] = {MPI_BYTE, MPI_DOUBLE}; MPI_Aint a_size = sizeof(cell2); MPI_Aint a_disp[3] = {offsetof(cell2, type), offsetof(cell2, u), offsetof(cell2, s)}; MPI_Aint p_size = 17; MPI_Aint p_disp[3] = {0, 1, 9}; MPI_Datatype p_tmp, a_tmp, p_cell, a_cell; // Aligned struct, memory representation MPI_Type_create_struct(2, blocks, a_disp, types, &a_tmp); MPI_Type_create_resized(a_tmp, 0, a_size, &a_cell); MPI_Type_commit(&a_cell); // Packed struct, file-based representation MPI_Type_create_struct(2, blocks, p_disp, types, &p_tmp); MPI_Type_create_resized(p_tmp, 0, p_size, &p_cell); MPI_Type_commit(&p_cell); // Now, we create our matrix MPI_Datatype matrix; int sizes[2] = {global_grid.n, global_grid.m}; int subsizes[2] = {local_nrows, local_ncols}; int starts[2] = {0, 0}; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, p_cell, &matrix); MPI_Type_commit(&matrix); // We extend this matrix MPI_Datatype ematrix; int e_subsizes[2] = {2 + subsizes[0], 2 + subsizes[1]}; int e_start[2] = {1, 1}; MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, a_cell, &ematrix); MPI_Type_commit(&ematrix); // The next 3 types are for the export of the grid MPI_Datatype d_type; MPI_Type_create_resized(MPI_DOUBLE, 0, sizeof(cell2), &d_type); MPI_Type_commit(&d_type); MPI_Datatype d_matrix; MPI_Type_create_subarray(2, sizes, subsizes, starts, MPI_ORDER_C, MPI_DOUBLE, &d_matrix); MPI_Type_commit(&d_matrix); MPI_Datatype d_rmatrix; // to go from the extended matrix with ghost zones to the other one MPI_Type_create_subarray(2, e_subsizes, subsizes, e_start, MPI_ORDER_C, d_type, &d_rmatrix); MPI_Type_commit(&d_rmatrix); // Set file view for each element MPI_Offset grid_start; MPI_File_get_position(input_file, &grid_start); MPI_File_set_view(input_file, grid_start + global_grid.m*local_nrows*p_size*coord[0] + local_ncols*p_size*coord[1], p_cell, matrix, "native", MPI_INFO_NULL); // allocate the cell array we will use cell2 **cells; cells = malloc(2*sizeof(cell2 *)); double *sensors; cells[1] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2)); cells[0] = calloc((2+local_nrows)*(2+local_ncols),sizeof(cell2)); sensors = calloc(local_nrows*local_ncols, sizeof(double)); MPI_File_read_all(input_file, cells[0], 1, ematrix, MPI_STATUS_IGNORE); MPI_File_close(&input_file); #ifdef DEBUG for(size_t i = 1; i < 1+local_nrows; i++) for(size_t j = 1; j < 1+local_ncols; j++) fprintf(stderr, "%d - %d %f\n", rank, cells[0][i*(2+local_ncols)+j].type, cells[0][i*(2+local_ncols)+j].u); #endif MPI_Datatype l_row; // local row MPI_Type_contiguous(local_ncols, d_type, &l_row); MPI_Type_commit(&l_row); MPI_Datatype l_col; // local column. A bit trickier, we need a type_vector. MPI_Type_vector(local_nrows, 1, local_ncols+2, d_type, &l_col); MPI_Type_commit(&l_col); int top, bot, left, right; double sqspeed = 0; int curr = 0, next = 0; char *alldump = malloc(256); for(int s = 0; s < instance.iteration; s++) { // We will update cell[next], and use the data of cell[curr] curr = s % 2; next = (s+1) % 2; // We copy the edges of the grid. // We first need the ranks of the neighbours MPI_Cart_shift(comm, 0, 1, &top, &bot); MPI_Cart_shift(comm, 1, 1, &left, &right); // Then we need to update the edges of our local grid // Update top and bottom rows MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u), 1, l_row, top, 0, &(cells[curr][(local_ncols+2)*(local_nrows+1)+1].u), 1, l_row, bot, 0, comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&(cells[curr][(local_ncols+2)*(local_nrows)+1].u), 1, l_row, bot, 0, &(cells[curr][1].u), 1, l_row, top, 0, comm, MPI_STATUS_IGNORE); // Update left and right MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+1].u), 1, l_col, left, 0, &(cells[curr][1*(local_ncols+2)+local_ncols+1].u), 1, l_col, right, 0, comm, MPI_STATUS_IGNORE); MPI_Sendrecv(&(cells[curr][1*(local_ncols+2)+local_ncols].u), 1, l_col, right, 0, &(cells[curr][1*(local_ncols+2)].u), 1, l_col, left, 0, comm, MPI_STATUS_IGNORE); // We compute the update of the grid for(size_t i = 1; i < 1+local_nrows; i++) { for(size_t j = 1; j < 1+local_ncols; j++) { if(instance.step < 2 || cells[next][j+i*(2+local_ncols)].type != 1) { // If walls we do not do anything sqspeed = cells[0][j+i*(2+local_ncols)].s * cells[0][j+i*(2+local_ncols)].s; cells[next][j+i*(2+local_ncols)].u = cells[curr][j+i*(2+local_ncols)].u + (cells[curr][j+i*(2+local_ncols)].v * instance.dt); cells[next][j+i*(2+local_ncols)].v = cells[curr][j+i*(2+local_ncols)].v + sqspeed * (cells[curr][j+(i+1)*(2+local_ncols)].u + cells[curr][j+(i-1)*(2+local_ncols)].u + cells[curr][(j+1) + i*(2+local_ncols)].u + cells[curr][(j-1) + i*(2+local_ncols)].u - (4 * cells[curr][j+i*(2+local_ncols)].u)) * instance.dt; if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2) { // Case of sensors sensors[(j-1)+(i-1)*local_ncols] += cells[next][j+i*(2+local_ncols)].u * cells[next][j+i*(2+local_ncols)].u; } } } } if(instance.alldump != NULL && s % instance.frequency == 0) { MPI_File dump_file; sprintf(alldump, instance.alldump, (s / instance.frequency)); MPI_File_open(comm, alldump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &dump_file); MPI_File_set_view(dump_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); MPI_File_write_all(dump_file, &(cells[curr][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE); MPI_File_close(&dump_file); } } if(instance.lastdump != NULL) { // bon, comment on fait ça ? peut être qu'en faisant un resize ça marche ? MPI_File last_file; MPI_File_open(comm, instance.lastdump, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &last_file); MPI_File_set_view(last_file, global_grid.m*local_nrows*sizeof(double)*coord[0] + local_ncols*sizeof(double)*coord[1], MPI_DOUBLE, d_matrix, "native", MPI_INFO_NULL); // déjà, il y a un grid_strat en trop, d_type ou MPI_DOUBLE ? MPI_File_write_all(last_file, &(cells[next][0].u), 1, d_rmatrix, MPI_STATUS_IGNORE); MPI_File_close(&last_file); } if(instance.step == 3 && instance.sensors != NULL) { MPI_File sensor_file; MPI_File_open(comm, instance.sensors, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &sensor_file); MPI_Datatype string; MPI_Type_contiguous(1024, MPI_CHAR, &string); MPI_Type_commit(&string); char text[1024]; for(size_t i = 1; i < 1+local_nrows; i++) { for(size_t j = 1; j < 1+local_ncols; j++) { if(instance.step == 3 && cells[next][j+i*(2+local_ncols)].type == 2) { memset(text,0,sizeof(text)); sprintf(text, "%zu %zu %f\r\n", (i-1)+coord[0]*local_nrows, (j-1)+coord[1]*local_ncols, sensors[(j-1)+(i-1)*local_ncols]); MPI_File_write(sensor_file, text, 1, string, MPI_STATUS_IGNORE); } } } MPI_Type_free(&string); MPI_File_close(&sensor_file); } // Some cleaning free(cells); free(alldump); MPI_Type_free(&a_cell); MPI_Type_free(&p_cell); MPI_Type_free(&matrix); MPI_Type_free(&ematrix); MPI_Type_free(&d_type); MPI_Type_free(&d_matrix); MPI_Type_free(&d_rmatrix); MPI_Type_free(&l_row); MPI_Type_free(&l_col); }
int main(int argc, char **argv) { if(argc < 2) { printf("Usage: %s infile\n", argv[0]); exit(1); } MPI_Comm comm = MPI_COMM_WORLD; MPI_Info mpi_info = MPI_INFO_NULL; MPI_File fh, fw; MPI_Offset file_size, frag_size, read_size; MPI_Offset offset; MPI_Status status; int retval; double start, end; unsigned char *buf, *outbuf, *outProps; size_t destlen; size_t propsize = 5; MPI_Init(&argc, &argv); MPI_Comm_rank(comm, &mpi_rank); MPI_Comm_size(comm, &mpi_size); MPI_Barrier(comm); start = MPI_Wtime(); /* * read */ MPI_File_open(comm, argv[1], MPI_MODE_RDONLY, mpi_info, &fh); MPI_File_get_size(fh, &file_size); //printf("file size:%d\n", file_size); frag_size = file_size / mpi_size; offset = frag_size * mpi_rank; read_size = MIN(frag_size, file_size - offset); //printf("rank %d offset %d\n", mpi_rank, offset); buf = malloc(frag_size + 2); assert(buf != NULL); MPI_File_open(comm, argv[1], MPI_MODE_RDONLY, mpi_info, &fh); MPI_File_read_at(fh, offset, buf, read_size, MPI_CHAR, &status); MPI_File_close(&fh); /* * compress */ destlen = 1.2 * frag_size + 1024 * 1024; outbuf = (unsigned char *)malloc(destlen); assert(outbuf != NULL); destlen = destlen - DATA_OFFSET -propsize; outProps = outbuf + DATA_OFFSET; retval = LzmaCompress(outbuf + DATA_OFFSET + propsize, &destlen, buf, read_size, outProps, &propsize, -1, 0, -1, -1, -1, -1, 1); if(retval != SZ_OK) { error_print(retval); free(buf); free(outbuf); exit(1); } /* * write */ char *fwname; unsigned long long *len; fwname = get_fwname(argv[1]); len = (unsigned long long *)outbuf; *len = read_size; //printf("%s %d\n", fwname, destlen); MPI_File_open(MPI_COMM_SELF, fwname, MPI_MODE_WRONLY | MPI_MODE_CREATE, mpi_info, &fw); MPI_File_set_size(fw, destlen); MPI_File_write(fw, outbuf, destlen + DATA_OFFSET + propsize, MPI_CHAR, &status); MPI_File_close(&fw); MPI_Barrier(comm); end = MPI_Wtime(); size_t cmprs_len; double cmprs_ratio; MPI_Reduce(&destlen, &cmprs_len, 1, MPI_UNSIGNED_LONG, MPI_SUM, 0, comm); if(0 == mpi_rank) { cmprs_ratio = (double)cmprs_len / file_size; printf("file size: %lu\n", file_size); printf("after compressed: %lu\n", cmprs_len); printf("compress ratio: %f\n", cmprs_ratio); printf("number of processes: %d\n", mpi_size); printf("time used: %fs\n", end - start); } MPI_Finalize(); free(fwname); free(buf); free(outbuf); return 0; }
/* * There are three stages of execution in this routine. * * 1. Data in an IO group is gathered together. An IO group consists of an * integer number of compute node layers and a single IO node, and the IO * node is where the data is gathered. * * 2. The data is transposed to have the desired layout in memory. Data on * a compute node is stored as [z][x][y]. After gathering to the IO node, * the ordering of compute nodes results in a data layout of * [l][h][z][y][x] where l iterates over layers and h iterates over rows. * We wish to transpose it to be [z][y][x], both because this is a more * intuitive ordering for data during analysis, and because z is the final * remaining distributed dimension, and a raw combination of the data in * all the IO nodes will now result in a well ordered layout. * * 3. The set of all IO nodes perform a parallel write to disk, resulting in * a single file with an expected ordering. * * Note: The reason compute nodes store data as [z][x][y] instead of * [z][y][x] is so that after an FFT operation (and it's required * transposed) spectral modes are stored in [kz][ky][kx]. This * is a simpler ordering to remember, and in the code we are far * more likely to iterate over individual dimensions in spectral * coordinates than spatial ones. IO happens rarely enough that * the cost of the extra transpose required here is probably * negligible, though this should be verified. */ void writeSpatial(field * f, char * name) { int i,j,k,l,m; debug("Writing spatial data to file %s\n", name); int sndcnt = 0; PRECISION * rcvbuff = 0; PRECISION * sndbuff = 0; //the extra +1 just gives a little extra room to do an extra loop below. //The extra element means nothing, it just makes the code a hair easier //to write int displs[iosize+1]; int rcvcounts[iosize]; debug("consolidating data to IO nodes\n"); if(compute_node) { sndcnt = my_x->width * my_z->width * ny; trace("Sending %d PRECISIONs\n", sndcnt); MPI_Gatherv(f->spatial, sndcnt, MPI_PRECISION, 0, 0, 0, MPI_PRECISION, 0, iocomm); debug("Write Spatial completed\n"); return; } else if(io_node) { rcvbuff = (PRECISION *)malloc(nx * ny * nz_layers * sizeof(PRECISION)); sndbuff = (PRECISION *)malloc(nx * ny * nz_layers * sizeof(PRECISION)); trace("Total local data will be %d PRECISIONs\n", nx*ny*nz_layers); //We need to calculate the starting index that data from each compute //processor will begin at in our array. //Note: Since our own IO node is not contributing any data, both our // IO node and the first compute node get to start at a // displacement of 0. displs[0] = 0; displs[1] = 0; rcvcounts[0] = 0; //staggered loop. We calculate how much data we receive from one //processor at the same time we calculate where the data for the //next processor will begin storage. int * pidspls = displs + 2; int * pircvcounts = rcvcounts+1; for(i = io_layers[my_io_layer].min; i <= io_layers[my_io_layer].max; i++) { for(j = 0; j < hdiv; j++) { *pircvcounts = all_x[j].width * all_z[i].width * ny; *pidspls = *(pidspls-1) + *pircvcounts; trace("Proc %d should send %d PRECISIONs at displacement %d\n", hdiv * i + j, *pircvcounts, *pidspls); pidspls++; pircvcounts++; } } MPI_Gatherv(0, 0, MPI_PRECISION, rcvbuff, rcvcounts, displs, MPI_PRECISION, 0, iocomm); } debug("transposing data so it is properly contiguous\n"); //rcvbuff is [l][h][vz][hx][y] //we want [lz][y][x] int indexr = 0; int indexs = 0; for(i = 0; i < io_layers[my_io_layer].width; i++) { for(j = 0; j < hdiv; j++) { int vz = all_z[i + io_layers[my_io_layer].min].width; int vzmin = all_z[i + io_layers[my_io_layer].min].min; int vzstart = all_z[io_layers[my_io_layer].min].min; for(k = 0; k < vz; k++) { int hx = all_x[j].width; int hxmin = all_x[j].min; for(l = 0; l < hx; l++) { for(m = 0; m < ny; m++) { indexs = ((k + vzmin - vzstart)*ny + m)*nx + l + hxmin; sndbuff[indexs] = rcvbuff[indexr]; indexr++; } } } } } debug("Performing parallel file write\n"); //TODO: revisit MPI_MODE_SEQUENTIAL and MPI_INFO_NULL to make sure these are what we want MPI_File fh; MPI_File_open(fcomm, name, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); debug("MPI File opened successfully\n"); //Calculate displacements for each IO processor into the full file. int disp = 0; //loop over each IO processor before us for(i = 0; i < my_io_layer; i++) { //calculate how many layers those IO processors contribute for(j = io_layers[i].min; j <= io_layers[i].max; j++) { disp += all_z[j].width; } } //Convert layers into actual data size. disp *= nx * ny * sizeof(PRECISION); trace("Our view starts at element %d\n", disp); trace("Setting view...\n"); MPI_File_set_view(fh, disp, MPI_PRECISION, MPI_PRECISION, "native", MPI_INFO_NULL); trace("Writing to file...\n"); MPI_File_write(fh, sndbuff, nx * ny * nz_layers, MPI_PRECISION, MPI_STATUS_IGNORE ); MPI_File_close(&fh); free(sndbuff); free(rcvbuff); debug("Write Spatial completed\n"); }
static int test_indexed_with_zeros(char *filename, int testcase) { int i, rank, np, buflen, num, err, nr_errors=0; int nelms[MAXLEN], buf[MAXLEN], indices[MAXLEN], blocklen[MAXLEN]; MPI_File fh; MPI_Status status; MPI_Datatype filetype; MPI_Datatype types[MAXLEN]; MPI_Aint addrs[MAXLEN]; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); /* set up the number of integers to write in each iteration */ for (i=0; i<MAXLEN; i++) nelms[i] = 0; if (rank == 0) nelms[4]=nelms[5]=nelms[7]=1; if (rank == 1) nelms[0]=nelms[1]=nelms[2]=nelms[3]=nelms[6]=nelms[8]=1; /* pre-fill the file with integers -999 */ if (rank == 0) { for (i=0; i<MAXLEN; i++) buf[i] = -999; err =MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_write(fh, buf, MAXLEN, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write"); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); } MPI_Barrier(MPI_COMM_WORLD); /* define a filetype with spurious leading zeros */ buflen = num = 0; for (i=0; i<MAXLEN; i++) { buflen += nelms[i]; indices[num] = i; addrs[num] = i*sizeof(int); blocklen[num] = nelms[i]; types[num] = MPI_INT; num++; } switch (testcase) { case INDEXED: MPI_Type_indexed(num, blocklen, indices, MPI_INT, &filetype); break; case HINDEXED: MPI_Type_hindexed(num, blocklen, addrs, MPI_INT, &filetype); break; case STRUCT: MPI_Type_create_struct(num, blocklen, addrs, types, &filetype); break; default: fprintf(stderr, "unknown testcase!\n"); return(-100); } MPI_Type_commit(&filetype); /* initialize write buffer and write to file*/ for (i=0; i<MAXLEN; i++) buf[i] = 1; err =MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_set_view(fh, 0, MPI_INT, filetype, "native", MPI_INFO_NULL); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_set_view"); err = MPI_File_write_all(fh, buf, buflen, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_write_all"); MPI_Type_free(&filetype); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); /* read back and check */ if (rank == 0) { err = MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_open"); err = MPI_File_read(fh,buf, MAXLEN, MPI_INT, &status); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_read"); err = MPI_File_close(&fh); if (err != MPI_SUCCESS) handle_error(err, "MPI_File_close"); for (i=0; i<MAXLEN; i++) { if (buf[i] < 0) { nr_errors++; printf("Error: unexpected value for case %d at buf[%d] == %d\n", testcase,i,buf[i]); } } } return nr_errors; }
int main(int argc, char *argv[]) { int iarrayOfSizes[2], iarrayOfSubsizes[2], iarrayOfStarts[2], ilocal_size; int nproc[2], periods[2], icoord[2]; int m, n, i, j, wsize, wrank, crank, ndims, lrows, lcols, grow, gcol, err; MPI_Datatype filetype; MPI_File fh; MPI_Comm cartcomm; MPI_Info info0, info3; double t, topen, twrite, tclose, wrate; double *local_array; char nstripesStr[12], stripeUnitStr[12]; int nstripes = -1; int stripeUnit = -1; MPI_Offset headerSize = 0; MPI_Init(0,0); MPI_Comm_rank(MPI_COMM_WORLD, &wrank); /* Get global array size */ m = n = 128; /* Set default size */ /* ioda [ n ] [ m ] [ nstripes ] [ stripeunit ] [ headersize ] */ if (argc > 0) { if (argc > 1) m = atoi(argv[1]); if (argc > 2) n = atoi(argv[2]); if (argc > 3) nstripes = atoi(argv[3]); if (argc > 4) stripeUnit = atoi(argv[4]); if (argc > 5) headerSize = atoi(argv[5]); if (argc > 6) { if (wrank == 0) fprintf(stderr,"Unrecognized argument %s\n", argv[6]); MPI_Abort(MPI_COMM_WORLD,1); } } if (wrank == 0) printf("Matrix is [%d,%d]; file dir = %s\n", m, n, MYSCRATCHDIR ); /* The default number of stripes = totalsize/1M */ if (nstripes < 0) { nstripes = n * m * sizeof(double) / (1024*1024); if (nstripes < 1) nstripes = 1; } if (wrank == 0) printf("nstripes = %d, stripeUnit = %d, header size = %d\n", nstripes, stripeUnit, (int)headerSize); /* Use topology routines to get decomposition and coordinates */ MPI_Comm_size(MPI_COMM_WORLD, &wsize); nproc[0] = 0; nproc[1] = 0; ndims = 2; MPI_Dims_create(wsize, ndims, nproc); periods[0] = 0; periods[1] = 0; MPI_Cart_create(MPI_COMM_WORLD, ndims, nproc, periods, 1, &cartcomm); MPI_Comm_rank(cartcomm, &crank); MPI_Cart_coords(cartcomm, crank, ndims, icoord); iarrayOfSizes[0] = m; iarrayOfSizes[1] = n; iarrayOfSubsizes[0] = m/nproc[0]; iarrayOfSubsizes[1] = n/nproc[1]; iarrayOfStarts[0] = icoord[0] * iarrayOfSubsizes[0]; iarrayOfStarts[1] = icoord[1] * iarrayOfSubsizes[1]; /* Initialize my block of the data */ ilocal_size = iarrayOfSubsizes[0] * iarrayOfSubsizes[1]; lrows = iarrayOfSubsizes[0]; lcols = iarrayOfSubsizes[1]; local_array = (double *)malloc(lrows*lcols*sizeof(double)); gcol = iarrayOfStarts[1]; grow = iarrayOfStarts[0]; for (i=0; i<lrows; i++) { for (j=0; j<lcols; j++) { local_array[j*lrows+i] = (grow+i) + (gcol+j)*m; } } /* Fortran order simply means the data is stored by columns */ MPI_Type_create_subarray(ndims, iarrayOfSizes, iarrayOfSubsizes, iarrayOfStarts, MPI_ORDER_FORTRAN, MPI_DOUBLE, &filetype); MPI_Type_commit(&filetype); info0 = MPI_INFO_NULL; info3 = MPI_INFO_NULL; if (nstripes > 0 || stripeUnit > 0) { MPI_Info_create(&info0); if (nstripes > 0) { snprintf(nstripesStr, sizeof(nstripesStr), "%d", nstripes); MPI_Info_set(info0, "striping_factor", nstripesStr); MPI_Info_set(info0, "cb_nodes", nstripesStr); } if (stripeUnit > 0) { snprintf(stripeUnitStr, sizeof(stripeUnitStr), "%d", stripeUnit); MPI_Info_set(info0, "striping_unit", stripeUnitStr); } MPI_Info_dup(info0, &info3); MPI_Info_set(info3, "romio_no_indep_rw", "true"); /* Other hints to consider: direct_io=true The default cb_buffer_size is 16777216 , but is overridden by the striping unit, which is smaller by default. */ } /* level - 3 */ MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-3.out", MPI_MODE_CREATE | MPI_MODE_RDWR, info3, &fh); topen = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "open testfile-3.out"); if (headerSize > 0) { /* Simulate writing a header */ if (wrank == 0) { char *header; header = (char *)calloc(1,(size_t)headerSize); MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE); free(header); } MPI_Barrier(cartcomm); } MPI_File_set_view(fh, headerSize, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL); MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_write_all(fh, local_array, ilocal_size, MPI_DOUBLE, MPI_STATUS_IGNORE); twrite = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "collective write"); err = MPI_File_close(&fh); tclose = MPI_Wtime() - t; /* tclose is the time for the write(s) + the close, in case the implementation delays (some of) the writes until the close */ if (err != MPI_SUCCESS) myAbort(err, "close testfile-3.out"); MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (twrite > 0) wrate = (double)m * (double)n * sizeof(double)/twrite; if (wrank == 0) printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen, twrite, tclose, wrate); /* level - 0 */ MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); err = MPI_File_open(cartcomm, MYSCRATCHDIR "testfile-0.out", MPI_MODE_CREATE | MPI_MODE_RDWR, info0, &fh); topen = MPI_Wtime() - t; if (err != MPI_SUCCESS) myAbort(err, "open testfile-0.out"); if (headerSize > 0) { /* Simulate writing a header */ if (wrank == 0) { char *header; header = (char *)calloc(1,(size_t)headerSize); MPI_File_write(fh, header, headerSize, MPI_BYTE, MPI_STATUS_IGNORE); free(header); } MPI_Barrier(cartcomm); } MPI_Barrier(MPI_COMM_WORLD); t = MPI_Wtime(); gcol = iarrayOfStarts[1]; grow = iarrayOfStarts[0]; for (j=0; j<lcols; j++) { MPI_Offset offset = headerSize + ((MPI_Offset)(grow) + (MPI_Offset)(gcol+j)*m) * sizeof(double); err = MPI_File_write_at(fh, offset, local_array+j*lrows, lrows, MPI_DOUBLE, MPI_STATUS_IGNORE); if (err != MPI_SUCCESS) myAbort(err, "write at"); } twrite = MPI_Wtime() - t; err = MPI_File_close(&fh); tclose = MPI_Wtime() - t; /* tclose is the time for the write(s) + the close, in case the implementation delays (some of) the writes until the close */ if (err != MPI_SUCCESS) myAbort(err, "close testfile-0"); MPI_Allreduce(MPI_IN_PLACE, &topen, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &twrite, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(MPI_IN_PLACE, &tclose, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); if (twrite > 0) wrate = (double)m * (double)n * sizeof(double)/twrite; if (wrank == 0) printf("%d\t[%d,%d]\t%d\t%.2e\t%.2e\t%.2e\t%.2e\n", wsize, m, n, nstripes, topen, twrite, tclose, wrate); if (info0 != MPI_INFO_NULL) { MPI_Info_free(&info0); MPI_Info_free(&info3); } free(local_array); MPI_Finalize(); return 0; }
int main (int argc, char **argv) { struct arguments arguments; /* Parse our arguments; every option seen by parse_opt will be reflected in arguments. */ argp_parse (&argp, argc, argv, 0, 0, &arguments); int run_type; run_type = 0; //default is serial if (sscanf (arguments.args[0], "%i", &run_type)!=1) {} int iterations; iterations = 0; //default is serial if (sscanf (arguments.args[1], "%i", &iterations)!=1) {} int count_when; count_when = 1000; if (sscanf (arguments.args[2], "%i", &count_when)!=1) {} char print_list[200]; //used for input list if (sscanf (arguments.args[3], "%s", &print_list)!=1) {} // printf("Print list = %s\n", print_list); //Extract animation list from arguments char char_array[20][12] = { NULL }; //seperated input list int animation_list[20][2] = { NULL }; //integer input list start,range char *tok = strtok(print_list, ","); //counters int i,j,k,x,y,ii,jj; ii = 0; jj = 0; //Loop over tokens parsing our commas int tok_len = 0; while (tok != NULL) { //first loop parses out commas tok_len = strlen(tok); for (jj=0;jj<tok_len;jj++) { char_array[ii][jj] = tok[jj]; } // printf("Tok = %s\n", char_array[ii]); tok = strtok(NULL, ","); ii++; } //looking for a range input, convert to ints int stop; for (ii=0;ii<20;ii++) { //convert first number to int tok = strtok(char_array[ii], "-"); if (tok != NULL) { animation_list[ii][0] = atoi(tok); tok = strtok(NULL, ","); } //look for second number, add to range if (tok != NULL) { stop = atoi(tok); animation_list[ii][1] = stop - animation_list[ii][0]; } // if (rank == 0) // { // printf("Animation_list = %i, %i\n", // animation_list[ii][0], animation_list[ii][1]); // } } //should an animation be generated //prints a bunch of .pgm files, have to hand //make the gif... int animation; animation = arguments.animation; //verbose? int verbose; verbose = arguments.verbose; // printf("VERBOSE = %i",verbose); if (verbose>0 && verbose<=10) { verbose = 1; } // Initialize the MPI environment MPI_Init(NULL, NULL); // Get the number of processes int world_size; MPI_Comm_size(MPI_COMM_WORLD, &world_size); // Get the rank of the process int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get the name of the processor char processor_name[MPI_MAX_PROCESSOR_NAME]; int name_len; MPI_Get_processor_name(processor_name, &name_len); //Print run information, exit on bad command line input if (rank == 0 && verbose == 1) { printf("Verbose=%i, RunType=%i, Iterations=%i, CountWhen=%i, Animation=%i\n", verbose,run_type,iterations,count_when, animation); } if (world_size>1 && run_type ==0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (world_size==1 && run_type>0) { printf("Runtype and processors count not consistant\n"); MPI_Finalize(); exit(0); } if (count_when <= 0) { if (rank == 0) { printf("Invalid count interval, positive integers only\n"); } MPI_Finalize(); exit(0); } //serial if (world_size == 1 && run_type == 0) { ncols=1; nrows=1; } //Blocked else if (world_size>1 && run_type == 1) { ncols = 1; nrows = world_size; my_col = 0; my_row = rank; } //Checker else if (world_size>1 && run_type == 2) { ncols = (int)sqrt(world_size); nrows = (int)sqrt(world_size); my_row = rank/nrows; my_col = rank-my_row*nrows; if (ncols*nrows!=world_size) { if (rank == 0) { printf("Number of processors must be square, Exiting\n"); } MPI_Finalize(); exit(0); } } // if (verbose == 1) // { // printf("WR,row,col=%i,%i,%i\n",rank,my_row,my_col); // } //////////////////////READ IN INITIAL PGM//////////////////////////////// if(!readpgm("life.pgm")) { // printf("WR=%d,HERE2\n",rank); if( rank==0 ) { pprintf( "An error occured while reading the pgm file\n" ); } MPI_Finalize(); return 1; } // Count the life forms. Note that we count from [1,1] - [height+1,width+1]; // we need to ignore the ghost row! i = 0; for(y=1; y<local_height+1; y++ ) { for(x=1; x<local_width+1; x++ ) { if( field_a[ y * field_width + x ] ) { i++; } } } // pprintf( "%i local buggies\n", i ); int total; MPI_Allreduce( &i, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if( rank==0 && verbose == 1 ) { pprintf( "%i total buggies\n", total ); } // printf("WR=%d, Row=%d, Col=%d\n",rank,my_row,my_col); //Row and column size per processor int rsize, csize; rsize = local_width; csize = local_height; if (rank == 0 && verbose == 1) { printf("rsize,csize,NP = %d, %d, %d\n",rsize,csize,world_size); } //Create new derived datatype for writing to files MPI_Datatype submatrix; int array_of_gsizes[2]; int array_of_distribs[2]; int array_of_dargs[2]; int array_of_psize[2]; if (run_type == 1) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } else if (run_type == 2) { if (rank == 0) { printf("g0,g1 = %i,%i\n", local_height*ncols, local_width*nrows); printf("p0,p1 = %i,%i\n", nrows, ncols); } array_of_gsizes[0] = local_height*ncols; array_of_gsizes[1] = local_width*nrows; array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK; array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK; array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG; array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG; array_of_psize[0] = nrows; array_of_psize[1] = ncols; // int order = MPI_ORDER_C; //size,rank,ndims,array_gsizes,array_distribs,array_args,array_psizes //order,oldtype,*newtype MPI_Type_create_darray(world_size, rank, 2, array_of_gsizes, array_of_distribs, array_of_dargs, array_of_psize, MPI_ORDER_C, MPI_UNSIGNED_CHAR, &submatrix); MPI_Type_commit(&submatrix); } MPI_Barrier(MPI_COMM_WORLD); //////////////////ALLOCATE ARRAYS, CREATE DATATYPES///////////////////// //Create new column derived datatype MPI_Datatype column; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(csize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &column); MPI_Type_commit(&column); //Create new row derived datatype MPI_Datatype row; //count, blocklength, stride, oldtype, *newtype MPI_Type_hvector(rsize, 1, sizeof(unsigned char), MPI_UNSIGNED_CHAR, &row); MPI_Type_commit(&row); //allocate arrays and corner storage unsigned char *section; unsigned char *neighbors; //to use unsigned char *top; unsigned char *bot; unsigned char *left; unsigned char *right; //to send unsigned char *ttop; unsigned char *tbot; unsigned char *tleft; unsigned char *tright; //MALLOC!! section = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); neighbors = (unsigned char*)malloc(rsize*csize*sizeof(unsigned char)); top = (unsigned char*)malloc(rsize*sizeof(unsigned char)); bot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); left = (unsigned char*)malloc(csize*sizeof(unsigned char)); right = (unsigned char*)malloc(csize*sizeof(unsigned char)); ttop = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tbot = (unsigned char*)malloc(rsize*sizeof(unsigned char)); tleft = (unsigned char*)malloc(csize*sizeof(unsigned char)); tright = (unsigned char*)malloc(csize*sizeof(unsigned char)); //corners unsigned char topleft,topright,botleft,botright; //used in calculations unsigned char ttopleft,ttopright,tbotleft,tbotright; topleft = 255; topright = 255; botleft = 255; botright = 255; //used for animation, each process will put there own result in and then //each will send to process 1 which will add them up unsigned char* full_matrix; unsigned char* full_matrix_buffer; if (animation == 1) { int msize1 = rsize*ncols*csize*nrows; full_matrix = (unsigned char*)malloc(msize1*sizeof(unsigned char)); full_matrix_buffer = (unsigned char*)malloc(msize1*sizeof(unsigned char)); for (i=0; i<msize1; i++) { full_matrix[i] = 0; full_matrix_buffer[i] = 0; } } // printf("Rsize,Lsize,Fsize=%i %i %i,Csize,Lsize,Fsize=%i %i %i\n",rsize,local_width,field_width,csize,local_height,field_height); //Serial initialize vars int count = 0; if (world_size == 1 && run_type == 0) { for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // printf("COUNT 4 = %d\n", count); } //Blocked/Checkered initializing variables else if (world_size > 1 && (run_type == 1 || run_type == 2)) { //initialize for (i=0;i<csize;i++) { for (j=0;j<rsize;j++) { section[i*rsize + j] = 255; if (field_a[(i+1)*(2+rsize) + j + 1]) { section[i*rsize + j] = 0; count += 1; } else { section[i*rsize + j] = 255; } top[j] = 255; bot[j] = 255; ttop[j] = 255; tbot[j] = 255; } right[i] = 255; left[i] = 255; tright[i] = 255; tleft[i] = 255; } // MPI_Allreduce( &count, &total, 1, MPI_UNSIGNED_CHAR, MPI_SUM, MPI_COMM_WORLD ); // if (rank == 0) // { // printf("COUNT 4 = %d\n", total); // } } //header/footer for mpio writes char header1[15]; header1[0] = 0x50; header1[1] = 0x35; header1[2] = 0x0a; header1[3] = 0x35; header1[4] = 0x31; header1[5] = 0x32; header1[6] = 0x20; header1[7] = 0x35; header1[8] = 0x31; header1[9] = 0x32; header1[10] = 0x0a; header1[11] = 0x32; header1[12] = 0x35; header1[13] = 0x35; header1[14] = 0x0a; char footer; footer = 0x0a; //make a frame or not? int create_frame = 0; //send to int send_to; int receive_from; int info[5]; info[2] = rank; info[3] = rsize; info[4] = csize; unsigned char info2[4]; info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; int current_count; int location; //Gameplay for (k=0;k<iterations;k++) { //Count buggies if (k%count_when==0) { if (verbose == 1) { current_count = rsize*csize-count_buggies(rsize,csize,section); MPI_Allreduce( ¤t_count, &total, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rank == 0) { printf("Iteration=%5d, Count=%6d\n", k,total); } ////corner debug // printf("WR,tl,tr,bl,br = %d %d %d %d %d\n", rank, topleft, topright, botleft, botright); } } //Write to file serially for comparison //If animation is requested if (animation == 1 && run_type == 0) { //Put smaller matrix part into larger matrix for (i=0; i<csize; i++) { for (j=0; j<rsize; j++) { location = (my_row*csize*rsize*ncols + my_col*rsize + i*rsize*ncols + j); full_matrix_buffer[location] = section[i*rsize+j]; } // if (rank == 0) // { // printf("Location = %d\n", location); // } } //Gather matrix MPI_Reduce(full_matrix_buffer, full_matrix, rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_SUM, 0, MPI_COMM_WORLD); if (rank == 0 && run_type == 0) { write_matrix_to_pgm(k, rsize*ncols, csize*nrows, full_matrix); } } //mpio write pgm else if (animation == 1 && (run_type == 1 || run_type == 2)) { //default is no frame create_frame = 0; for (ii=0;ii<20;ii++) { for (jj=0;jj<animation_list[ii][1]+1;jj++) { // if (rank == 0) // { // printf("a,ii,j,k= %i,%i,%i,%i, Frame? = %i\n", // animation_list[ii][0],ii,jj,k,(animation_list[ii][0]+jj-k)==0); // } if ((animation_list[ii][0] + jj - k) == 0) { create_frame = 1; break; } } } if (create_frame == 1) { //dynamic filename with leading zeroes for easy conversion to gif char buffer[128]; snprintf(buffer, sizeof(char)*128, "Animation/frame%04d.pgm", k); /* open the file, and set the view */ MPI_File file; MPI_File_open(MPI_COMM_WORLD, buffer, MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &file); MPI_File_set_view(file, 0, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); //write header MPI_File_write(file, &header1, 15, MPI_CHAR, MPI_STATUS_IGNORE); //write matrix MPI_File_set_view(file, 15, MPI_UNSIGNED_CHAR, submatrix, "native", MPI_INFO_NULL); MPI_File_write_all(file, section, rsize*csize, MPI_UNSIGNED_CHAR, MPI_STATUS_IGNORE); //write footer (trailing newline) MPI_File_set_view(file, 15+rsize*ncols*csize*nrows, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, "native", MPI_INFO_NULL); MPI_File_write(file, &footer, 1, MPI_CHAR, MPI_STATUS_IGNORE); } } // BLOCKED COMMUNITATION // if (run_type == 1) { //change bot (send top) to account for middle area //alternate to avoid locking send_to = rank - 1; receive_from = rank + 1; //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //send top, receive bot if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from >= 0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //change top to account for middle area //alternate to avoid locking send_to = rank + 1; receive_from = rank - 1; //send bot, receive top if (rank%2==0) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from >= 0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { // printf("%d, %d, %d\n", rank, send_to, receive_from); if (receive_from<world_size && receive_from >= 0) { //*data,count,type,from,tag,comm,mpi_status MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { //*data,count,type,to,tag,comm MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } } // CHECKERED COMMUNITATION // else if (run_type == 2) { //figure out what to send //top and bottom for (i=0;i<rsize;i++) { ttop[i] = section[i]; tbot[i] = section[rsize*(csize-1)+i]; } //left n right for (i=0;i<csize;i++) { tleft[i] = section[0 + rsize*i]; tright[i] = section[rsize-1 + rsize*i]; } //corners ttopleft = tleft[0]; tbotleft = tleft[csize-1]; ttopright = tright[0]; tbotright = tright[csize-1]; //Send top, receive bot send_to = rank - nrows; receive_from = rank + nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(bot, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(ttop, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send bot, receive top send_to = rank + nrows; receive_from = rank - nrows; if (rank%2==0) { if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0) { MPI_Recv(top, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0) { MPI_Send(tbot, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send left, receive right send_to = rank - 1; receive_from = rank + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(right, 1, column, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tleft, 1, column, send_to, 0, MPI_COMM_WORLD); } } //Send right, receive left send_to = rank + 1; receive_from = rank - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row) { MPI_Recv(left, 1, row, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row) { MPI_Send(tright, 1, row, send_to, 0, MPI_COMM_WORLD); } } //Send topright, receive botleft send_to = rank - ncols + 1; receive_from = rank + ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send topleft, receive botright send_to = rank - ncols - 1; receive_from = rank + ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row+1) { MPI_Recv(&botright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row-1) { MPI_Send(&ttopleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botleft, receive topright send_to = rank + ncols - 1; receive_from = rank - ncols + 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topright, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotleft, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } //Send botright, receive topleft send_to = rank + ncols + 1; receive_from = rank - ncols - 1; if (rank%2==0) { if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } else if (rank%2==1) { if (receive_from<world_size && receive_from>=0 && receive_from/nrows==my_row-1) { MPI_Recv(&topleft, 1, MPI_UNSIGNED_CHAR, receive_from, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if (send_to<world_size && send_to>=0 && send_to/nrows==my_row+1) { MPI_Send(&tbotright, 1, MPI_UNSIGNED_CHAR, send_to, 0, MPI_COMM_WORLD); } } info2[0] = topleft; info2[1] = topright; info2[2] = botleft; info2[3] = botright; } // if (rank == 1){ // print_matrix(rsize, 1, top); // print_matrix(rsize, csize, section); // print_matrix(rsize, 1, bot); // printf("\n"); // } // printf("wr=%d,iteration=%d,maxval=%d, 11\n", rank, k,(csize-1)*rsize-1+rsize); /////////// CELL UPDATES ///////////////// //count neighbor for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { info[0] = i; info[1] = j; neighbors[i*rsize+j] = count_neighbors(info, info2, section, top, bot, left, right); // printf("%i",neighbors[i*rsize+j]); } // printf("\n"); } //update cells current_count = 0; for (i=0;i<csize;i++) { for (j=0; j<rsize; j++) { //cell currently alive if (section[i*rsize+j] == 0) { //2 or 3 neighbors lives, else die if (neighbors[i*rsize+j] < 2 || neighbors[i*rsize+j] > 3) { section[i*rsize+j] = 255; } } else { //Exactly 3 neighbors spawns new life if (neighbors[i*rsize+j] == 3) { section[i*rsize+j] = 0; } } } } } MPI_Barrier(MPI_COMM_WORLD); sleep(0.5); //free malloc stuff if( field_a != NULL ) free( field_a ); if( field_b != NULL ) free( field_b ); free(section); free(neighbors); free(top); free(bot); free(left); free(right); MPI_Finalize(); exit (0); }
int main(int argc, char **argv) { MPI_File fh; MPI_Status status; MPI_Offset size; long long *buf, i; char *filename; int j, mynod, nprocs, len, flag, err; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); if (nprocs != 1) { fprintf(stderr, "Run this program on one process only\n"); MPI_Abort(MPI_COMM_WORLD, 1); } i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: large -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); fprintf(stderr, "This program creates an 4 Gbyte file. Don't run it if you don't have that much disk space!\n"); buf = (long long *) malloc(SIZE * sizeof(long long)); if (!buf) { fprintf(stderr, "not enough memory to allocate buffer\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh); for (i=0; i<NTIMES; i++) { for (j=0; j<SIZE; j++) buf[j] = i*SIZE + j; err = MPI_File_write(fh, buf, SIZE, MPI_DOUBLE, &status); /* MPI_DOUBLE because not all MPI implementations define MPI_LONG_LONG_INT, even though the C compiler supports long long. */ if (err != MPI_SUCCESS) { fprintf(stderr, "MPI_File_write returned error\n"); MPI_Abort(MPI_COMM_WORLD, 1); } } MPI_File_get_size(fh, &size); fprintf(stderr, "file size = %lld bytes\n", size); MPI_File_seek(fh, 0, MPI_SEEK_SET); for (j=0; j<SIZE; j++) buf[j] = -1; flag = 0; for (i=0; i<NTIMES; i++) { err = MPI_File_read(fh, buf, SIZE, MPI_DOUBLE, &status); /* MPI_DOUBLE because not all MPI implementations define MPI_LONG_LONG_INT, even though the C compiler supports long long. */ if (err != MPI_SUCCESS) { fprintf(stderr, "MPI_File_write returned error\n"); MPI_Abort(MPI_COMM_WORLD, 1); } for (j=0; j<SIZE; j++) if (buf[j] != i*SIZE + j) { fprintf(stderr, "error: buf %d is %lld, should be %lld \n", j, buf[j], i*SIZE + j); flag = 1; } } if (!flag) fprintf(stderr, "Data read back is correct\n"); MPI_File_close(&fh); free(buf); free(filename); MPI_Finalize(); return 0; }
int main(int argc, char** argv){ int irank, nrank; MPI_Init (&argc, &argv); MPI_Comm_size (MCW, &nrank); MPI_Comm_rank (MCW, &irank); double t1,t2; if(irank==0) t1 = MPI_Wtime(); int nx, ny; int px, py; /* コピペゾーン */ // (1) init dims int dims[2] = {0,0}; MPI_Dims_create(nrank,2,dims); ny = (NY-1)/dims[0]; nx = (NX-1)/dims[1]; // (2) init cart int periods[2] = {0,0}; // 非周期境界 MPI_Comm cart; MPI_Cart_create(MCW, 2, dims, periods, 0, &cart); int c[2]; /* 座標 */ MPI_Cart_coords(cart, irank, 2, c); py = c[0]; // c[2]は大きい順なのでyがc[0] px = c[1]; double h = 1.0/NX; double dt = 0.1*h*h; double dth2 = dt/h/h; int i,j,k; int height = ny+2, width = nx+2; double (*u)[width]; u = (double(*)[width])malloc(height*width*sizeof(double)); u = (double(*)[width])(&u[1][1]); double (*un)[width]; un = (double(*)[width])malloc(height*width*sizeof(double)); un = (double(*)[width])(&un[1][1]); for (j=-1;j<ny+1;j++) for (i=-1;i<nx+1;i++){ u[j][i] = 0.0; } // (y=0) if (py==0) for (i=-1;i<nx+1;i++){ u[-1][i] = 1.0; } // (x=0) if (px==0) for (j=0;j<ny+1;j++){ u[j][-1] = 0.5; } MPI_Datatype vedge; MPI_Type_vector(ny, 1, nx+2, MPI_DOUBLE, &vedge); MPI_Type_commit(&vedge); int north, south, east, west; MPI_Cart_shift(cart,0,1,&south,&north); MPI_Cart_shift(cart,1,1,&west,&east); /* loop start */ for (k=0; k<2000; k++){ for (j=0; j<ny; j++){ for (i=0; i<nx; i++) un[j][i] = u[j][i] + ( -4*u[j][i] + u[j][i+1] + u[j][i-1] + u[j+1][i] + u[j-1][i] )*dth2; } for (j=0; j<ny; j++){ for (i=0; i<nx; i++) u[j][i] = un[j][i]; } MPI_Sendrecv(&u[ny-1][0], nx, MPI_DOUBLE, north, 0, &u[-1][0], nx, MPI_DOUBLE, south, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&u[0][0], nx, MPI_DOUBLE, south, 0, &u[ny][0], nx, MPI_DOUBLE, north, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&u[0][nx-1], 1, vedge, east, 0, &u[0][-1], 1, vedge, west, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); MPI_Sendrecv(&u[0][0], 1, vedge, west, 0, &u[0][nx], 1, vedge, east, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } // end loop(k) /* setview */ MPI_File udata; MPI_File_open(cart, "u.data", MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &udata); MPI_File_set_size(udata,0); int size[2] = {NY+1, LW*(NX+1)+1}, subsize[2], start[2]; // ... py,pxはcartesian座標 subsize[0] = ny; subsize[1] = LW*nx; start[0] = py*ny+1; start[1] = LW*(px*nx+1); if (py == 0){ subsize[0]++; start[0]=0; } /* 南端↓ */ if (py == dims[0]-1) subsize[0]++; /* 北端↑ */ if (px == 0){ subsize[1]+=LW; start[1]=0; } /* 西端← */ if (px == dims[1]-1) subsize[1]+=LW+1; /* 東端→ */ MPI_Datatype ftype; MPI_Type_create_subarray(2, size, subsize, start, MPI_ORDER_C, MPI_CHAR, &ftype); MPI_Type_commit(&ftype); MPI_File_set_view(udata, 0, MPI_CHAR, ftype, "native", MPI_INFO_NULL); /* output */ MPI_Status st; char *wbuf = (char*)malloc((LW*(nx+2)+2)*sizeof(char)); int jstart=0,istart=0, jend=ny, iend=nx; if(py==0) jstart = -1; if(py==dims[0]-1) jend = ny+1; if(px==0) istart = -1; if(px==dims[1]-1) iend = nx+1; for(j=jstart; j<jend; j++){ for(i=istart,k=0; i<iend; i++,k+=LW){ sprintf( wbuf+k, " %.15E %.15E %21.15E\n", (i+1 + px*nx)*h, (j+1 + py*ny)*h, u[j][i] ); } if( px == dims[1]-1 ) // 東端→ sprintf(wbuf+(k++),"\n"); MPI_File_write(udata,wbuf,k,MPI_CHAR,&st); } MPI_File_close(&udata); if(irank==0){ t2 = MPI_Wtime(); printf("%g\n",t2-t1); } MPI_Finalize (); return 0; }
/*----< main() >------------------------------------------------------------*/ int main(int argc, char **argv) { int i, j, err, rank, np, num_io; char *buf, *filename; int rank_dim[2], array_of_sizes[2]; int array_of_subsizes[2]; int count, *blocklengths, global_array_size; MPI_Count ftype_size; MPI_Aint *displacements; MPI_File fh; MPI_Datatype ftype; MPI_Request *request; MPI_Status *statuses; MPI_Status status; MPI_Offset offset = 0; int nr_errors = 0; #ifdef VERBOSE int k; #endif MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); if (np != 4) { if (!rank) printf("Please run with 4 processes. Exiting ...\n\n"); MPI_Finalize(); return 1; } filename = (argc > 1) ? argv[1] : "testfile"; num_io = 2; request = (MPI_Request *) malloc(num_io * sizeof(MPI_Request)); statuses = (MPI_Status *) malloc(num_io * sizeof(MPI_Status)); /*-----------------------------------------------------------------------*/ /* process rank in each dimension */ rank_dim[0] = rank / 2; rank_dim[1] = rank % 2; /* global 2D array size */ array_of_sizes[0] = YLEN * 2; array_of_sizes[1] = XLEN * 2; global_array_size = array_of_sizes[0] * array_of_sizes[1]; array_of_subsizes[0] = YLEN / 2; array_of_subsizes[1] = XLEN * SUB_XLEN / 5; offset = rank_dim[0] * YLEN * array_of_sizes[1] + rank_dim[1] * XLEN; /* define data type for file view */ count = array_of_subsizes[0] * 2; /* 2 is the no. blocks along X */ blocklengths = (int *) malloc(count * sizeof(int)); displacements = (MPI_Aint *) malloc(count * sizeof(MPI_Aint)); for (i = 0; i < count; i++) blocklengths[i] = array_of_subsizes[1] / 2; for (i = 0; i < array_of_subsizes[0]; i++) for (j = 0; j < 2; j++) displacements[i * 2 + j] = offset + i * 2 * array_of_sizes[1] + j * XLEN / 2; MPI_Type_create_hindexed(count, blocklengths, displacements, MPI_CHAR, &ftype); MPI_Type_commit(&ftype); MPI_Type_size_x(ftype, &ftype_size); /* subarray's layout in the global array P0's 's layout P1's layout [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] [ 0] 0 1 2 3 4 5 | D E F G H I [ 1] | [ 2] 6 7 8 9 : ; | J K L M N O [ 3] | [ 4] | [ 5] | [ 6] | [ 7] | [ 8] | [ 9] | P2's 's layout P3's layout [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] [ 0] | [ 1] | [ 2] | [ 3] | [ 4] | [ 5] X Y Z [ \ ] | l m n o p q [ 6] | [ 7] ^ _ ` a b c | r s t u v w [ 8] | [ 9] | */ /* initialize the write buffer */ buf = (char *) malloc(array_of_subsizes[0] * array_of_subsizes[1]); for (i = 0; i < array_of_subsizes[0] * array_of_subsizes[1]; i++) buf[i] = '0' + rank * 20 + i % 79; /* zero file contents --------------------------------------------------- */ if (rank == 0) { char *wr_buf = (char *) calloc(num_io * global_array_size, 1); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_write(fh, wr_buf, num_io * global_array_size, MPI_CHAR, &status); MPI_File_close(&fh); free(wr_buf); } /* open the file -------------------------------------------------------- */ err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) { printf("Error: MPI_File_open() filename %s\n", filename); MPI_Abort(MPI_COMM_WORLD, -1); exit(1); } /* MPI nonblocking collective write */ for (i = 0; i < num_io; i++) { offset = i * global_array_size; /* set the file view */ MPI_File_set_view(fh, offset, MPI_BYTE, ftype, "native", MPI_INFO_NULL); MPI_File_iwrite_all(fh, buf, ftype_size, MPI_CHAR, &request[i]); } MPI_Waitall(num_io, request, statuses); MPI_File_close(&fh); /* read and print file contents ----------------------------------------- */ if (rank == 0) { char *ptr; char *rd_buf = (char *) calloc(num_io * global_array_size, 1); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); MPI_File_read(fh, rd_buf, num_io * global_array_size, MPI_CHAR, &status); MPI_File_close(&fh); #ifdef VERBOSE printf("-------------------------------------------------------\n"); printf(" ["); for (i = 0; i < 2; i++) { for (j = 0; j < XLEN; j++) printf(" %d", j); printf(" "); } printf("]\n\n"); ptr = rd_buf; for (k = 0; k < num_io; k++) { for (i = 0; i < 2 * YLEN; i++) { printf("[%2d]", k * 2 * YLEN + i); for (j = 0; j < 2 * XLEN; j++) { if (j > 0 && j % XLEN == 0) printf(" "); if (*ptr != 0) printf(" %c", *ptr); else printf(" "); ptr++; } printf("\n"); } printf("\n"); } #endif ptr = rd_buf; for (i = 0; i < 2 * YLEN * num_io; i++) { for (j = 0; j < 2 * XLEN; j++) { if (*ptr != compare_buf[i][j]) { fprintf(stderr, "expected %d got %d at [%d][%d]\n", *ptr, compare_buf[i][j], i, j); nr_errors++; } ptr++; } } free(rd_buf); if (nr_errors == 0) fprintf(stdout, " No Errors\n"); else fprintf(stderr, "Found %d errors\n", nr_errors); } free(blocklengths); free(displacements); free(buf); free(request); free(statuses); MPI_Type_free(&ftype); MPI_Finalize(); return 0; }
//------------------------------------------------------------------------------ // Function to output non-magnetic atomic positions to disk //------------------------------------------------------------------------------ void atoms_non_magnetic(){ //------------------------------------------------------------ // Determine non magnetic atoms to be outputted to coord list //------------------------------------------------------------ // array of atom numbers to be outputted std::vector<uint64_t> atom_list(0); // get output bounds const double minB[3] = {atoms_output_min[0] * cs::system_dimensions[0], atoms_output_min[1] * cs::system_dimensions[1], atoms_output_min[2] * cs::system_dimensions[2]}; const double maxB[3] = {atoms_output_max[0] * cs::system_dimensions[0], atoms_output_max[1] * cs::system_dimensions[1], atoms_output_max[2] * cs::system_dimensions[2]}; // Determine non magnetic atoms to be outputted to coord list for (uint64_t atom = 0; atom < cs::non_magnetic_atoms_array.size(); atom++){ const double cc[3] = {cs::non_magnetic_atoms_array[atom].x, cs::non_magnetic_atoms_array[atom].y, cs::non_magnetic_atoms_array[atom].z}; // check atom within output bounds if ( (cc[0] >= minB[0]) && (cc[0] <= maxB[0]) ){ if ( (cc[1] >= minB[1]) && (cc[1] <= maxB[1]) ){ if ( (cc[2] >= minB[2]) && (cc[2] <= maxB[2]) ){ atom_list.push_back(atom); //non-magnetic atoms } } } } //------------------------------------------------ // Create temporary buffers for atom information //------------------------------------------------ uint64_t num_local_atoms = atom_list.size(); uint64_t num_total_atoms = 0; // number of atoms across all processors #ifdef MPICF // calculate number of atoms to be output on all processors MPI_Allreduce(&num_local_atoms, &num_total_atoms, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); #else num_total_atoms = num_local_atoms; #endif std::vector<int> atom_type_buffer(num_local_atoms); for(unsigned int atom = 0; atom < num_local_atoms; atom++) atom_type_buffer[atom] = cs::non_magnetic_atoms_array[ atom_list[atom] ].mat; std::vector<int> atom_category_buffer(num_local_atoms); for(unsigned int atom = 0; atom < num_local_atoms; atom++) atom_category_buffer[atom] = cs::non_magnetic_atoms_array[ atom_list[atom] ].cat; std::vector<double> atom_coord_buffer(3*num_local_atoms); for(unsigned int atom = 0; atom < num_local_atoms; atom++){ const uint64_t atom_id = atom_list[atom]; // get atom array index atom_coord_buffer[3*atom + 0] = cs::non_magnetic_atoms_array[atom_id].x; atom_coord_buffer[3*atom + 1] = cs::non_magnetic_atoms_array[atom_id].y; atom_coord_buffer[3*atom + 2] = cs::non_magnetic_atoms_array[atom_id].z; } //------------------------------------------ // Output Meta Data from root process //------------------------------------------ // set number of files // const int files = config::internal::num_io_groups; // unused variable if(config::internal::mode != legacy && vmpi::my_rank == 0){ config::internal::write_non_magnetic_meta(num_total_atoms); } //------------------------------------------ // Output coordinate data //------------------------------------------ // Determine output filename std::stringstream file_sstr; // set simple file name for single file output if(config::internal::num_io_groups == 1) file_sstr << "non-magnetic-atoms.data"; // otherwise set indexed files else file_sstr << "non-magnetic-atoms-" << std::setfill('0') << std::setw(6) << config::internal::io_group_id << ".data"; // convert string stream to string std::string filename = file_sstr.str(); // Calculate number of bytes to be written to disk const double data_size = double(num_total_atoms) * 1.0e-9 * (3.0*double(sizeof(double) + 2.0*double(sizeof(int)) ) ); // Output informative messages of actual data size to be outputed to disk (in binary mode) zlog << zTs() << "Total non-magnetic data filesize: " << 1000.0 * data_size << " MB" << std::endl; // Output informative message to log file on root process zlog << zTs() << "Outputting non-magnetic atomic coordinate file to disk "; // Variable for calculating output bandwidth double io_time = 1.0e-12; //----------------------------------------------------- // Parallel mode output //----------------------------------------------------- #ifdef MPICF // Determine io mode and call appropriate function for data switch(config::internal::mode){ // legacy case config::internal::legacy: break; case config::internal::mpi_io:{ vutil::vtimer_t timer; // instantiate timer MPI_File fh; // MPI file handle MPI_Status status; // MPI io status // convert filename to character string for output char *cfilename = (char*)filename.c_str(); // Open file on all processors MPI_File_open(MPI_COMM_WORLD, cfilename, MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &fh); // write number of atoms on root process if(vmpi::my_rank == 0) MPI_File_write(fh, &num_total_atoms, 1, MPI_UINT64_T, &status); // Calculate local byte offsets since MPI-IO is simple and doesn't update the file handle pointer after I/O MPI_Offset type_offset = config::internal::linear_offset + sizeof(uint64_t); MPI_Offset category_offset = config::internal::linear_offset + num_total_atoms * sizeof(int) + sizeof(uint64_t); MPI_Offset data_offset = config::internal::buffer_offset + 2 * num_total_atoms * sizeof(int) + sizeof(uint64_t); timer.start(); // start timer // Write data to disk MPI_File_write_at_all(fh, type_offset, &atom_type_buffer[0], atom_type_buffer.size(), MPI_INT, &status); MPI_File_write_at_all(fh, category_offset, &atom_category_buffer[0], atom_category_buffer.size(), MPI_INT, &status); MPI_File_write_at_all(fh, data_offset, &atom_coord_buffer[0], atom_coord_buffer.size(), MPI_DOUBLE, &status); timer.stop(); // Stop timer // Calculate elapsed time io_time = timer.elapsed_time(); // Close file MPI_File_close(&fh); break; } case config::internal::fpprocess: io_time = write_coord_data(filename, atom_coord_buffer, atom_type_buffer, atom_category_buffer); break; case config::internal::fpnode:{ // Gather data from all processors in io group std::vector<int> collated_atom_type_buffer(0); collate_int_data(atom_type_buffer, collated_atom_type_buffer); std::vector<int> collated_atom_category_buffer(0); collate_int_data(atom_category_buffer, collated_atom_category_buffer); std::vector<double> collated_atom_coord_buffer(0); collate_double_data(atom_coord_buffer, collated_atom_coord_buffer); // output data on master io processes if(config::internal::io_group_master) io_time = write_coord_data(filename, collated_atom_coord_buffer, collated_atom_type_buffer, collated_atom_category_buffer); // find longest time in all io nodes double max_io_time = 0.0; // calculate actual bandwidth on root process MPI_Reduce(&io_time, &max_io_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); io_time = max_io_time; break; } } #else //----------------------------------------------------- // Serial mode output (ignores most io directives) //----------------------------------------------------- // if new output (not legacy) then output non magnetic atoms if(config::internal::mode != config::internal::legacy) io_time = write_coord_data(filename, atom_coord_buffer, atom_type_buffer, atom_category_buffer); #endif // Output bandwidth to log file zlog << data_size/io_time << " GB/s in " << io_time << " s" << std::endl; return; }