int main(int argc, char **argv) { int *buf, i, mynod, nprocs, len, b[3]; int errs=0, toterrs; MPI_Aint d[3]; MPI_File fh; MPI_Status status; char *filename; MPI_Datatype typevec, newtype, t[3]; MPI_Info info; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &mynod); if (nprocs != 2) { fprintf(stderr, "Run this program on two processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } /* process 0 takes the file name as a command-line argument and broadcasts it to other processes (length first, then string) */ if (!mynod) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: noncontig -fname filename\n\n"); MPI_Abort(MPI_COMM_WORLD, 1); } argv++; len = strlen(*argv); filename = (char *) malloc(len+1); strcpy(filename, *argv); MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } else { MPI_Bcast(&len, 1, MPI_INT, 0, MPI_COMM_WORLD); filename = (char *) malloc(len+1); MPI_Bcast(filename, len+1, MPI_CHAR, 0, MPI_COMM_WORLD); } buf = (int *) malloc(SIZE*sizeof(int)); MPI_Type_vector(SIZE/2, 1, 2, MPI_INT, &typevec); /* create a struct type with explicitly set LB and UB; displacements * of typevec are such that the types for the two processes won't * overlap. */ b[0] = b[1] = b[2] = 1; d[0] = 0; d[1] = mynod*sizeof(int); d[2] = SIZE*sizeof(int); t[0] = MPI_LB; t[1] = typevec; t[2] = MPI_UB; /* keep the struct, ditch the vector */ MPI_Type_struct(3, b, d, t, &newtype); MPI_Type_commit(&newtype); MPI_Type_free(&typevec); MPI_Info_create(&info); /* I am setting these info values for testing purposes only. It is better to use the default values in practice. */ MPI_Info_set(info, "ind_rd_buffer_size", "1209"); MPI_Info_set(info, "ind_wr_buffer_size", "1107"); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting noncontiguous in memory, noncontiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* set the file view for each process -- now writes go into the non- * overlapping but interleaved region defined by the struct type up above */ MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); /* fill our buffer with a pattern and write, using our type again */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write(fh, buf, 1, newtype, &status); MPI_Barrier(MPI_COMM_WORLD); /* fill the entire buffer with -1's. read back with type. * note that the result of this read should be that every other value * in the buffer is still -1, as defined by our type. */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, 0, buf, 1, newtype, &status); /* check that all the values read are correct and also that we didn't * overwrite any of the -1 values that we shouldn't have. */ for (i=0; i<SIZE; i++) { if (!mynod) { if ((i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } if (!(i%2) && (buf[i] != i)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if ((i%2) && (buf[i] != i + mynod*SIZE)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } if (!(i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting noncontiguous in memory, contiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* in this case we write to either the first half or the second half * of the file space, so the regions are not interleaved. this is done * by leaving the file view at its default. */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status); MPI_Barrier(MPI_COMM_WORLD); /* same as before; fill buffer with -1's and then read; every other * value should still be -1 after the read */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, mynod*(SIZE/2)*sizeof(int), buf, 1, newtype, &status); /* verify that the buffer looks like it should */ for (i=0; i<SIZE; i++) { if (!mynod) { if ((i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } if (!(i%2) && (buf[i] != i)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if ((i%2) && (buf[i] != i + mynod*SIZE)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } if (!(i%2) && (buf[i] != -1)) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be -1\n", mynod, i, buf[i]); } } } MPI_File_close(&fh); MPI_Barrier(MPI_COMM_WORLD); if (!mynod) { #if VERBOSE fprintf(stderr, "\ntesting contiguous in memory, noncontiguous in file using independent I/O\n"); #endif MPI_File_delete(filename, MPI_INFO_NULL); } MPI_Barrier(MPI_COMM_WORLD); MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_RDWR, info, &fh); /* set the file view so that we have interleaved access again */ MPI_File_set_view(fh, 0, MPI_INT, newtype, "native", info); /* this time write a contiguous buffer */ for (i=0; i<SIZE; i++) buf[i] = i + mynod*SIZE; MPI_File_write(fh, buf, SIZE, MPI_INT, &status); MPI_Barrier(MPI_COMM_WORLD); /* fill buffer with -1's; this time they will all be overwritten */ for (i=0; i<SIZE; i++) buf[i] = -1; MPI_File_read_at(fh, 0, buf, SIZE, MPI_INT, &status); for (i=0; i<SIZE; i++) { if (!mynod) { if (buf[i] != i) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i); } } else { if (buf[i] != i + mynod*SIZE) { errs++; fprintf(stderr, "Process %d: buf %d is %d, should be %d\n", mynod, i, buf[i], i + mynod*SIZE); } } } MPI_File_close(&fh); MPI_Allreduce( &errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (mynod == 0) { if( toterrs > 0) { fprintf( stderr, "Found %d errors\n", toterrs ); } else { fprintf( stdout, " No Errors\n" ); } } MPI_Type_free(&newtype); MPI_Info_free(&info); free(buf); free(filename); MPI_Finalize(); return 0; }
int main (int argc, char *argv[]) { int proc_num, my_rank, len; int i, j; double start_time, elapsed_time, all_time; double all_time_max, all_time_avg, all_time_min; struct timespec ts; MPI_Status status; MPI_File fh; MPI_Datatype contig_type; MPI_Init(&argc, &argv); // get the number of procs and rank in the comm MPI_Comm_size(MPI_COMM_WORLD, &proc_num); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); if(argc != 5) { printf("Wrong argument number!\n"); printf("Use %s filename request_size repeat_times\n", argv[0]); return 0; } int req_size = atoi(argv[2]); int repeat_time = atoi(argv[3]); double ht_read_time= atof(argv[4]); ht_read_time *= 2.0; //if(my_rank == 0) // printf("Sleep time: %lf\n",ht_read_time); ts.tv_sec = (int)ht_read_time; ts.tv_nsec = (ht_read_time - ts.tv_sec) * 1000000000; MPI_Offset stride = proc_num * req_size; MPI_Offset tmp_pos = my_rank * req_size; char *read_data = (char*)malloc(req_size); MPI_Type_contiguous( req_size, MPI_CHAR, &contig_type); MPI_Type_commit(&contig_type); start_time = MPI_Wtime(); //MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh); MPI_File_open(MPI_COMM_WORLD, argv[1], MPI_MODE_RDWR, MPI_INFO_NULL, &fh); if(fh==NULL){ printf("File not exist\n"); return -1; } for(i = 0; i < repeat_time; i++) { // MPI_Barrier(MPI_COMM_WORLD); MPI_File_read_at( fh, tmp_pos, read_data, 1, contig_type, &status ); tmp_pos += stride; nanosleep(&ts, NULL); } MPI_File_close(&fh); elapsed_time = MPI_Wtime() - start_time; MPI_Reduce(&elapsed_time, &all_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce(&elapsed_time, &all_time_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce(&elapsed_time, &all_time_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); all_time_avg /= proc_num; MPI_Barrier(MPI_COMM_WORLD); double data_in_mb = (proc_num*(double)req_size*repeat_time)/(1024.0*1024.0); if(my_rank == 0) printf("Total time: %lf Min time: %lf Avg time: %lf Total data: %dM Agg Bandwidth: %lf\n", all_time, all_time_min, all_time_avg, (int)data_in_mb, data_in_mb/all_time); // printf("%d: %lf\n",my_rank, elapsed_time); free(read_data); MPI_Type_free(&contig_type); MPI_Finalize(); return 0; }
int main (int argc, char ** argv) { int taskid, ntasks; int xsize, ysize, colmax; pixel src[MAX_PIXELS]; double w[MAX_RAD]; struct timespec stime, etime; struct timespec tstime, tetime; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); MPI_Comm_size(MPI_COMM_WORLD, &ntasks); // Create a custom MPI datatype for pixel pixel item; MPI_Datatype pixel_mpi; MPI_Datatype type[3] = { MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR, MPI_UNSIGNED_CHAR }; int blocklen[] = { 1, 1, 1 }; MPI_Aint start, disp[3]; MPI_Address( &item, &start ); MPI_Address( &item.r, &disp[0] ); MPI_Address( &item.g, &disp[1] ); MPI_Address( &item.b, &disp[2] ); disp[0] -= start; disp[1] -= start; disp[2] -= start; MPI_Type_struct(3, blocklen, disp, type, &pixel_mpi); MPI_Type_commit(&pixel_mpi); int buffsize, radius, startY, endY; /* Take care of the arguments */ if (argc != 4) { fprintf(stderr, "Usage: %s radius infile outfile\n", argv[0]); exit(1); } radius = atoi(argv[1]); if((radius > MAX_RAD) || (radius < 1)) { fprintf(stderr, "Radius (%d) must be greater than zero and less then %d\n", radius, MAX_RAD); exit(1); } if (taskid == ROOT) { /* read file */ if(read_ppm (argv[2], &xsize, &ysize, &colmax, (char *) src) != 0) exit(1); if (colmax > 255) { fprintf(stderr, "Too large maximum color-component value\n"); exit(1); } /* filter */ printf("Has read the image, generating coefficients\n"); get_gauss_weights(radius, w); } // Broadcast the gaussian weight vector MPI_Bcast(w, MAX_RAD, MPI_DOUBLE, ROOT, MPI_COMM_WORLD); // Broadcast image dimensions MPI_Bcast(&xsize, 1, MPI_INT, ROOT, MPI_COMM_WORLD); MPI_Bcast(&ysize, 1, MPI_INT, ROOT, MPI_COMM_WORLD); // Calculate chunk size buffsize = ceil((float)ysize / (float)ntasks) * xsize; pixel recvbuff[MAX_PIXELS]; int sendcnts[ntasks], displs[ntasks], result_write_starts[ntasks], recievecounts[ntasks]; int i; // Generate sendcount and displacement vectors for Scatterv for (i = 0; i < ntasks; i++) { // Send enought neighbors to make it possible to also calculate // blur in the edges of the chunk sendcnts[i] = buffsize + 2 * radius * xsize; displs[i] = max(0, i * buffsize); } clock_gettime(CLOCK_REALTIME, &tstime); // Send the image in chunks to all nodes MPI_Scatterv(src, sendcnts, displs, pixel_mpi, recvbuff, buffsize + 2 * radius * xsize, pixel_mpi, ROOT, MPI_COMM_WORLD); clock_gettime(CLOCK_REALTIME, &stime); // Run the filter on the recieved chunk blurfilter(xsize, (ysize / ntasks) + 2 * radius, recvbuff, radius, w, taskid); clock_gettime(CLOCK_REALTIME, &etime); printf("Filtering at %i took: %g secs\n", taskid, (etime.tv_sec - stime.tv_sec) + 1e-9*(etime.tv_nsec - stime.tv_nsec)); // Generate sendcount and displacement vectors for Scatterv for (i = 0; i < ntasks; i++) { result_write_starts[i] = i * buffsize + xsize * radius; // Only send as much of the chunk that is really useful data recievecounts[i] = buffsize; } // Start writing from the beginning of the buffer if root result_write_starts[0] = 0; // Since the root node has no overlap in the beginning, we need to // send a little bit more from that node than from the rest. recievecounts[0] = buffsize + xsize * radius; pixel* result_read_start; if(taskid==ROOT) { // Root-node has no duplicated data in the beginning result_read_start = recvbuff; } else { // Jump over the duplicated data in the beginning of each chunk result_read_start = recvbuff + xsize * radius; } MPI_Gatherv(result_read_start, recievecounts[taskid], pixel_mpi, src, recievecounts, result_write_starts, pixel_mpi, ROOT, MPI_COMM_WORLD); clock_gettime(CLOCK_REALTIME, &tetime); MPI_Finalize(); /* write result */ if (taskid == ROOT) { printf("Everything took: %g secs\n", (tetime.tv_sec - tstime.tv_sec) + 1e-9*(tetime.tv_nsec - tstime.tv_nsec)); printf("Writing output file\n"); if(write_ppm (argv[3], xsize, ysize, (char *)src) != 0) exit(1); } return(0); }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double *win_buf, *src_buf, *dst_buf; MPI_Win buf_win; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &src_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &dst_buf); if (rank == 0) if (verbose) printf("MPI RMA Strided Put Test:\n"); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = 1.0 + rank; *(src_buf + i) = 1.0 + rank; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank+1) % nranks; /* Perform ITERATIONS strided put operations */ for (i = 0; i < ITERATIONS; i++) { MPI_Aint idx_loc[SUB_YDIM]; int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype src_type, dst_type; if (rank == 0) if (verbose) printf(" + iteration %d\n", i); for (j = 0; j < SUB_YDIM; j++) { MPI_Get_address(&src_buf[j*XDIM], &idx_loc[j]); idx_rem[j] = j*XDIM*sizeof(double); blk_len[j] = SUB_XDIM*sizeof(double); } MPI_Type_create_hindexed(SUB_YDIM, blk_len, idx_loc, MPI_BYTE, &src_type); MPI_Type_create_indexed_block(SUB_YDIM, SUB_XDIM*sizeof(double), idx_rem, MPI_BYTE, &dst_type); MPI_Type_commit(&src_type); MPI_Type_commit(&dst_type); MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Put(MPI_BOTTOM, 1, src_type, peer, 0, 1, dst_type, buf_win); MPI_Win_unlock(peer, buf_win); MPI_Type_free(&src_type); MPI_Type_free(&dst_type); } MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, rank, 0, buf_win); errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = (1.0 + ((rank+nranks-1)%nranks)); if (actual - expected > 1e-10) { printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); errors++; fflush(stdout); } } } for (i = SUB_XDIM; i < XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = 1.0 + rank; if (actual - expected > 1e-10) { printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); errors++; fflush(stdout); } } } for (i = 0; i < XDIM; i++) { for (j = SUB_YDIM; j < YDIM; j++) { const double actual = *(win_buf + i + j*XDIM); const double expected = 1.0 + rank; if (actual - expected > 1e-10) { printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); errors++; fflush(stdout); } } } MPI_Win_unlock(rank, buf_win); MPI_Win_free(&buf_win); MPI_Free_mem(win_buf); MPI_Free_mem(src_buf); MPI_Free_mem(dst_buf); MPI_Finalize(); if (errors == 0) { if (rank == 0) printf(" No Errors\n"); return 0; } else { printf("%d: Fail\n", rank); return 1; } }
void parallel_blas3_product(double *A, double *B, double *C, int m, int k, int n, int id, int np) { if (k % np != 0) { if (id == 0) fprintf(stderr, "k is not divisible by np.\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Status status; int l_k = k / np; double *l_A = allocate_double_vector(l_k * m); double *l_B = allocate_double_vector(l_k * k); MPI_Datatype block_col_t; MPI_Datatype block_row_t; // for blocks in B = k x n MPI_Type_vector( n, // count = number of blocks, i.e. length of column * l_k(num rows) l_k, // blocklen = number of things in each block k, // stride = difference between start of blocks MPI_DOUBLE, // old datatype &block_row_t // new datatype ); MPI_Type_commit(&block_row_t); // for column of A= m x k MPI_Type_contiguous( m * l_k, // count = number of items MPI_DOUBLE, // old_type = type of items &block_col_t // new_mpi_type = the new datatype ); MPI_Type_commit(&block_col_t); if (id == 0) { // copy correct elements from A to l_A memcpy(l_A, A, sizeof(double) * l_k * m); for (int i = 1; i < np; ++i) { MPI_Send(&(A[0 + m*(i*l_k)]), 1, block_col_t, i, 0, MPI_COMM_WORLD); } } else { MPI_Recv(l_A, (m*l_k), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status); } if (id == 0) { // copy numbers from B to l_B for (int col = 0; col < n; ++col) { for (int row = 0; row < l_k; ++row) { l_B[row + l_k*col] = B[row + k*col]; } } for (int i = 1; i < np; ++i) { MPI_Send(&(B[i*l_k]), 1, block_row_t, i, 0, MPI_COMM_WORLD); } } else { MPI_Recv(l_B, (l_k*n), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD, &status); } /* //debugging only for (int i = 0; i < l_k*n; ++i) { printf("[%i]: Row l_B[%i]=%f\n", id, i, l_B[i]); } for (int i = 0; i < l_k*m; ++i) { printf("[%i]: Col l_A[%i]=%f\n", id, i, l_A[i]); } */ // C only matters on process 0 and should be allocated outside this function double *local_C = allocate_double_vector(m*n); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, l_k, 1, l_A, m, l_B, l_k, 0, local_C, m); MPI_Reduce(local_C, C, m*n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); free(local_C); free(l_A); free(l_B); }
/* test uses a struct type that describes data that is contiguous, * but processed in a noncontiguous way. */ int struct_negdisp_test(void) { int err, errs = 0; int sendbuf[6] = { 1, 2, 3, 4, 5, 6 }; int recvbuf[6] = { -1, -2, -3, -4, -5, -6 }; MPI_Datatype mystruct; MPI_Request request; MPI_Status status; MPI_Aint disps[2] = { 0, -1*((int) sizeof(int)) }; int blks[2] = { 1, 1, }; MPI_Datatype types[2] = { MPI_INT, MPI_INT }; err = MPI_Type_struct(2, blks, disps, types, &mystruct); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Type_struct returned error\n"); } } MPI_Type_commit(&mystruct); err = MPI_Irecv(recvbuf+1, 4, MPI_INT, 0, 0, MPI_COMM_SELF, &request); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Irecv returned error\n"); } } err = MPI_Send(sendbuf+2, 2, mystruct, 0, 0, MPI_COMM_SELF); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Send returned error\n"); } } err = MPI_Wait(&request, &status); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Wait returned error\n"); } } /* verify data */ if (recvbuf[0] != -1) { errs++; if (verbose) { fprintf(stderr, "recvbuf[0] = %d; should be %d\n", recvbuf[0], -1); } } if (recvbuf[1] != 3) { errs++; if (verbose) { fprintf(stderr, "recvbuf[1] = %d; should be %d\n", recvbuf[1], 3); } } if (recvbuf[2] != 2) { errs++; if (verbose) { fprintf(stderr, "recvbuf[2] = %d; should be %d\n", recvbuf[2], 2); } } if (recvbuf[3] != 5) { errs++; if (verbose) { fprintf(stderr, "recvbuf[3] = %d; should be %d\n", recvbuf[3], 5); } } if (recvbuf[4] != 4) { errs++; if (verbose) { fprintf(stderr, "recvbuf[4] = %d; should be %d\n", recvbuf[4], 4); } } if (recvbuf[5] != -6) { errs++; if (verbose) { fprintf(stderr, "recvbuf[5] = %d; should be %d\n", recvbuf[5], -6); } } MPI_Type_free(&mystruct); return errs; }
/** Optimized implementation of the ARMCI IOV operation that uses an MPI * datatype to achieve a one-sided gather/scatter. Does not use MPI_BOTTOM. */ int ARMCII_Iov_op_datatype_no_bottom(enum ARMCII_Op_e op, void **src, void **dst, int count, int elem_count, MPI_Datatype type, int proc) { gmr_t *mreg; MPI_Datatype type_loc, type_rem; MPI_Aint disp_loc[count]; int disp_rem[count]; int block_len[count]; void *dst_win_base; int dst_win_size, i, type_size; void **buf_rem, **buf_loc; MPI_Aint base_rem; MPI_Aint base_loc; void *base_loc_ptr; switch(op) { case ARMCII_OP_ACC: case ARMCII_OP_PUT: buf_rem = dst; buf_loc = src; break; case ARMCII_OP_GET: buf_rem = src; buf_loc = dst; break; default: ARMCII_Error("unknown operation (%d)", op); return 1; } MPI_Type_size(type, &type_size); mreg = gmr_lookup(buf_rem[0], proc); ARMCII_Assert_msg(mreg != NULL, "Invalid remote pointer"); dst_win_base = mreg->slices[proc].base; dst_win_size = mreg->slices[proc].size; MPI_Get_address(dst_win_base, &base_rem); /* Pick a base address for the start of the origin's datatype */ base_loc_ptr = buf_loc[0]; MPI_Get_address(base_loc_ptr, &base_loc); for (i = 0; i < count; i++) { MPI_Aint target_rem, target_loc; MPI_Get_address(buf_loc[i], &target_loc); MPI_Get_address(buf_rem[i], &target_rem); disp_loc[i] = target_loc - base_loc; disp_rem[i] = (target_rem - base_rem)/type_size; block_len[i] = elem_count; ARMCII_Assert_msg((target_rem - base_rem) % type_size == 0, "Transfer size is not a multiple of type size"); ARMCII_Assert_msg(disp_rem[i] >= 0 && disp_rem[i] < dst_win_size, "Invalid remote pointer"); ARMCII_Assert_msg(((uint8_t*)buf_rem[i]) + block_len[i] <= ((uint8_t*)dst_win_base) + dst_win_size, "Transfer exceeds buffer length"); } MPI_Type_create_hindexed(count, block_len, disp_loc, type, &type_loc); MPI_Type_create_indexed_block(count, elem_count, disp_rem, type, &type_rem); //MPI_Type_indexed(count, block_len, disp_rem, type, &type_rem); MPI_Type_commit(&type_loc); MPI_Type_commit(&type_rem); gmr_lock(mreg, proc); switch(op) { case ARMCII_OP_ACC: gmr_accumulate_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc); break; case ARMCII_OP_PUT: gmr_put_typed(mreg, base_loc_ptr, 1, type_loc, MPI_BOTTOM, 1, type_rem, proc); break; case ARMCII_OP_GET: gmr_get_typed(mreg, MPI_BOTTOM, 1, type_rem, base_loc_ptr, 1, type_loc, proc); break; default: ARMCII_Error("unknown operation (%d)", op); return 1; } gmr_unlock(mreg, proc); MPI_Type_free(&type_loc); MPI_Type_free(&type_rem); return 0; }
static int two_phase_exchage_data(mca_io_ompio_file_t *fh, void *buf, char *write_buf, struct iovec *offset_length, int *send_size,int *start_pos, int *recv_size, OMPI_MPI_OFFSET_TYPE off, OMPI_MPI_OFFSET_TYPE size, int *count, int *partial_recv, int *sent_to_proc, int contig_access_count, OMPI_MPI_OFFSET_TYPE min_st_offset, OMPI_MPI_OFFSET_TYPE fd_size, OMPI_MPI_OFFSET_TYPE *fd_start, OMPI_MPI_OFFSET_TYPE *fd_end, Flatlist_node *flat_buf, mca_io_ompio_access_array_t *others_req, int *send_buf_idx, int *curr_to_proc, int *done_to_proc, int iter, int *buf_idx,MPI_Aint buftype_extent, int striping_unit, int *aggregator_list, int *hole){ int *tmp_len=NULL, sum, *srt_len=NULL, nprocs_recv, nprocs_send, k,i,j; int ret=OMPI_SUCCESS; MPI_Request *requests=NULL, *send_req=NULL; MPI_Datatype *recv_types=NULL; OMPI_MPI_OFFSET_TYPE *srt_off=NULL; char **send_buf = NULL; ret = fh->f_comm->c_coll.coll_alltoall (recv_size, 1, MPI_INT, send_size, 1, MPI_INT, fh->f_comm, fh->f_comm->c_coll.coll_alltoall_module); if ( OMPI_SUCCESS != ret ){ return ret; } nprocs_recv = 0; for (i=0;i<fh->f_size;i++){ if (recv_size[i]){ nprocs_recv++; } } recv_types = (MPI_Datatype *) malloc (( nprocs_recv + 1 ) * sizeof(MPI_Datatype *)); if ( NULL == recv_types ){ return OMPI_ERR_OUT_OF_RESOURCE; } tmp_len = (int *) malloc(fh->f_size*sizeof(int)); if ( NULL == tmp_len ) { return OMPI_ERR_OUT_OF_RESOURCE; } j = 0; for (i=0;i<fh->f_size;i++){ if (recv_size[i]) { if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; tmp_len[i] = others_req[i].lens[k]; others_req[i].lens[k] = partial_recv[i]; } MPI_Type_hindexed(count[i], &(others_req[i].lens[start_pos[i]]), &(others_req[i].mem_ptrs[start_pos[i]]), MPI_BYTE, recv_types+j); MPI_Type_commit(recv_types+j); j++; } } sum = 0; for (i=0;i<fh->f_size;i++) sum += count[i]; srt_off = (OMPI_MPI_OFFSET_TYPE *) malloc((sum+1)*sizeof(OMPI_MPI_OFFSET_TYPE)); if ( NULL == srt_off ){ return OMPI_ERR_OUT_OF_RESOURCE; } srt_len = (int *) malloc((sum+1)*sizeof(int)); if ( NULL == srt_len ) { return OMPI_ERR_OUT_OF_RESOURCE; } two_phase_heap_merge(others_req, count, srt_off, srt_len, start_pos, fh->f_size,fh->f_rank, nprocs_recv, sum); for (i=0; i<fh->f_size; i++) if (partial_recv[i]) { k = start_pos[i] + count[i] - 1; others_req[i].lens[k] = tmp_len[i]; } if ( NULL != tmp_len ){ free(tmp_len); } *hole = 0; if (off != srt_off[0]){ *hole = 1; } else{ for (i=1;i<sum;i++){ if (srt_off[i] <= srt_off[0] + srt_len[0]){ int new_len = srt_off[i] + srt_len[i] - srt_off[0]; if(new_len > srt_len[0]) srt_len[0] = new_len; } else break; } if (i < sum || size != srt_len[0]) *hole = 1; } if ( NULL != srt_off ){ free(srt_off); } if ( NULL != srt_len ){ free(srt_len); } if (nprocs_recv){ if (*hole){ if (off > 0){ fh->f_io_array = (mca_io_ompio_io_array_t *)malloc (sizeof(mca_io_ompio_io_array_t)); if (NULL == fh->f_io_array) { opal_output(1, "OUT OF MEMORY\n"); return OMPI_ERR_OUT_OF_RESOURCE; } fh->f_io_array[0].offset =(IOVBASE_TYPE *)(intptr_t)off; fh->f_num_of_io_entries = 1; fh->f_io_array[0].length = size; fh->f_io_array[0].memory_address = write_buf; if (fh->f_num_of_io_entries){ if (OMPI_SUCCESS != fh->f_fbtl->fbtl_preadv (fh, NULL)) { opal_output(1, "READ FAILED\n"); return OMPI_ERROR; } } } fh->f_num_of_io_entries = 0; if (NULL != fh->f_io_array) { free (fh->f_io_array); fh->f_io_array = NULL; } } } nprocs_send = 0; for (i=0; i <fh->f_size; i++) if (send_size[i]) nprocs_send++; #if DEBUG_ON printf("%d : nprocs_send : %d\n", fh->f_rank,nprocs_send); #endif requests = (MPI_Request *) malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request)); if ( NULL == requests ){ return OMPI_ERR_OUT_OF_RESOURCE; } j = 0; for (i=0; i<fh->f_size; i++) { if (recv_size[i]) { ret = MCA_PML_CALL(irecv(MPI_BOTTOM, 1, recv_types[j], i, fh->f_rank+i+100*iter, fh->f_comm, requests+j)); if ( OMPI_SUCCESS != ret ){ return ret; } j++; } } send_req = requests + nprocs_recv; if (fh->f_flags & OMPIO_CONTIGUOUS_MEMORY) { j = 0; for (i=0; i <fh->f_size; i++) if (send_size[i]) { ret = MCA_PML_CALL(isend(((char *) buf) + buf_idx[i], send_size[i], MPI_BYTE, i, fh->f_rank+i+100*iter, MCA_PML_BASE_SEND_STANDARD, fh->f_comm, send_req+j)); if ( OMPI_SUCCESS != ret ){ return ret; } j++; buf_idx[i] += send_size[i]; } } else if(nprocs_send && (!(fh->f_flags & OMPIO_CONTIGUOUS_MEMORY))){ send_buf = (char **) malloc(fh->f_size*sizeof(char*)); if ( NULL == send_buf ){ return OMPI_ERR_OUT_OF_RESOURCE; } for (i=0; i < fh->f_size; i++){ if (send_size[i]) { send_buf[i] = (char *) malloc(send_size[i]); if ( NULL == send_buf[i] ){ return OMPI_ERR_OUT_OF_RESOURCE; } } } ret = two_phase_fill_send_buffer(fh, buf,flat_buf, send_buf, offset_length, send_size, send_req,sent_to_proc, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, send_buf_idx, curr_to_proc, done_to_proc, iter, buftype_extent, striping_unit, aggregator_list); if ( OMPI_SUCCESS != ret ){ return ret; } } for (i=0; i<nprocs_recv; i++) MPI_Type_free(recv_types+i); free(recv_types); ret = ompi_request_wait_all (nprocs_send+nprocs_recv, requests, MPI_STATUS_IGNORE); if ( NULL != requests ){ free(requests); } return ret; }
int main( int argc, char *argv[]) { struct a { int i; char c; } s[10], s1[10]; int j; int errs = 0; int rank, size, tsize; MPI_Aint text; int blens[2]; MPI_Aint disps[2]; MPI_Datatype bases[2]; MPI_Datatype str, con; MPI_Status status; MTest_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); for( j = 0; j < 10; j ++ ) { s[j].i = j + rank; s[j].c = j + rank + 'a'; } blens[0] = blens[1] = 1; disps[0] = 0; disps[1] = sizeof(int); bases[0] = MPI_INT; bases[1] = MPI_CHAR; MPI_Type_struct( 2, blens, disps, bases, &str ); MPI_Type_commit( &str ); MPI_Type_contiguous( 10, str, &con ); MPI_Type_commit( &con ); MPI_Type_size( con, &tsize ); MPI_Type_extent( con, &text ); MTestPrintfMsg( 0, "Size of MPI array is %d, extent is %d\n", tsize, text ); /* The following block of code is only for verbose-level output */ { void * p1, *p2; p1 = s; p2 = &(s[10].i); /* This statement may fail on some systems */ MTestPrintfMsg( 0, "C array starts at %p and ends at %p for a length of %d\n", s, &(s[9].c), (char *)p2-(char *)p1 ); } MPI_Type_extent( str, &text ); MPI_Type_size( str, &tsize ); MTestPrintfMsg( 0, "Size of MPI struct is %d, extent is %d\n", tsize, (int)text ); MTestPrintfMsg( 0, "Size of C struct is %d\n", sizeof(struct a) ); if (text != sizeof(struct a)) { fprintf( stderr, "Extent of struct a (%d) does not match sizeof (%d)\n", (int)text, (int)sizeof(struct a) ); errs++; } MPI_Send( s, 1, con, rank ^ 1, 0, MPI_COMM_WORLD ); MPI_Recv( s1, 1, con, rank ^ 1, 0, MPI_COMM_WORLD, &status ); for( j = 0; j < 10; j++ ) { MTestPrintfMsg( 0, "%d Sent: %d %c, Got: %d %c\n", rank, s[j].i, s[j].c, s1[j].i, s1[j].c ); if ( s1[j].i != j + status.MPI_SOURCE ) { errs++; fprintf( stderr, "Got s[%d].i = %d; expected %d\n", j, s1[j].i, j + status.MPI_SOURCE ); } if ( s1[j].c != 'a' + j + status.MPI_SOURCE ) { errs++; /* If the character is not a printing character, this can generate a file that diff, for example, believes is a binary file */ if (isprint( (int)(s1[j].c) )) { fprintf( stderr, "Got s[%d].c = %c; expected %c\n", j, s1[j].c, j + status.MPI_SOURCE + 'a'); } else { fprintf( stderr, "Got s[%d].c = %x; expected %c\n", j, (int)s1[j].c, j + status.MPI_SOURCE + 'a'); } } } MPI_Type_free( &str ); MPI_Type_free( &con ); MTest_Finalize( errs ); MPI_Finalize(); return 0; }
static int test_vard(int ncid, int *varid) { int rank, nprocs, err, nerrs=0, i, buf[NY+4][NX+4]; int array_of_sizes[2], array_of_subsizes[2], array_of_starts[2]; MPI_Offset start[2], count[2]; MPI_Datatype buftype, rec_filetype, fix_filetype; MPI_Comm_size(MPI_COMM_WORLD, &nprocs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); start[0] = 0; start[1] = NX*rank; count[0] = 2; count[1] = NX; /* create a buftype with ghost cells on each side */ array_of_sizes[0] = count[0]+4; array_of_sizes[1] = count[1]+4; array_of_subsizes[0] = count[0]; array_of_subsizes[1] = count[1]; array_of_starts[0] = 2; array_of_starts[1] = 2; MPI_Type_create_subarray(2, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_INT, &buftype); MPI_Type_commit(&buftype); /* create a file type for the fixed-size variable */ array_of_sizes[0] = 2; array_of_sizes[1] = NX*nprocs; array_of_subsizes[0] = count[0]; array_of_subsizes[1] = count[1]; array_of_starts[0] = start[0]; array_of_starts[1] = start[1]; MPI_Type_create_subarray(2, array_of_sizes, array_of_subsizes, array_of_starts, MPI_ORDER_C, MPI_INT, &fix_filetype); MPI_Type_commit(&fix_filetype); /* create a file type for the record variable */ int *array_of_blocklengths=(int*) malloc(count[0]*sizeof(int)); MPI_Aint *array_of_displacements=(MPI_Aint*) malloc(count[0]*sizeof(MPI_Aint)); MPI_Offset recsize; err = ncmpi_inq_recsize(ncid, &recsize); for (i=0; i<count[0]; i++) { array_of_blocklengths[i] = count[1]; array_of_displacements[i] = start[1]*sizeof(int) + recsize * i; } MPI_Type_create_hindexed(2, array_of_blocklengths, array_of_displacements, MPI_INT, &rec_filetype); MPI_Type_commit(&rec_filetype); free(array_of_blocklengths); free(array_of_displacements); TRC(ncmpi_put_vard_all)(ncid, varid[0], rec_filetype, &buf[0][0], 1, buftype); CHECK_ERR TRC(ncmpi_rename_var)(ncid, varid[0], "rec_VAR"); CHECK_ERR TRC(ncmpi_put_vard_all)(ncid, varid[1], fix_filetype, &buf[0][0], 1, buftype); CHECK_ERR TRC(ncmpi_rename_var)(ncid, varid[0], "rec_var"); CHECK_ERR TRC(ncmpi_begin_indep_data)(ncid); CHECK_ERR TRC(ncmpi_put_vard)(ncid, varid[0], rec_filetype, &buf[0][0], 1, buftype); CHECK_ERR TRC(ncmpi_rename_var)(ncid, varid[0], "rec_VAR"); CHECK_ERR TRC(ncmpi_put_vard)(ncid, varid[1], fix_filetype, &buf[0][0], 1, buftype); CHECK_ERR TRC(ncmpi_rename_var)(ncid, varid[0], "rec_var"); CHECK_ERR TRC(ncmpi_end_indep_data)(ncid); CHECK_ERR MPI_Type_free(&rec_filetype); MPI_Type_free(&fix_filetype); MPI_Type_free(&buftype); return nerrs; }
int main( int argc, char *argv[] ) { int myrank, size; int *sendbuf,*recvbuf; int i; int count; int block[3]; MPI_Aint disp[3]; MPI_Status *stat; MPI_Request *request; MPI_Datatype newtype; MPI_Datatype dtype[3]; MPI_Init( 0, 0 ); MPI_Comm_size( MPI_COMM_WORLD, &size ); MPI_Comm_rank( MPI_COMM_WORLD, &myrank ); sendbuf = (int*)malloc(MAXSIZE); recvbuf = (int*)malloc(MAXSIZE); stat = (MPI_Status*)malloc(sizeof(MPI_Status)*size); request = (MPI_Request*)malloc(sizeof(MPI_Request)*size); for(i=0;i<MAXSIZE/4;i++){ sendbuf[i] = 1; recvbuf[i] = 0; } count = 2; dtype[0] = MPI_LB; dtype[1] = MPI_INT; dtype[2] = MPI_UB; block[0] = 1; block[1] = (MAXSIZE/count)/size/sizeof(int); block[2] = 1; disp[0] = 0; disp[1] = (MAXSIZE/count)/size*myrank; disp[2] = MAXSIZE/count; MPI_Type_struct( 3, block, disp, dtype, &newtype); MPI_Type_commit(&newtype); if(myrank == 0){ MPI_Send( sendbuf, count, newtype, 1, 0, MPI_COMM_WORLD); } if(myrank == 1){ MPI_Recv( recvbuf, count, newtype, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } if(myrank == 1){ for(i=0;i<block[1];i++){ if((0 != recvbuf[i])){ printf("MYRANK %d failed 1 recvbuf[%d] %d\n",myrank,i,recvbuf[i]); MPI_Finalize(); exit(0); } } for(i = block[1] ; i<block[1]*2;i++){ if(1 != recvbuf[i]){ printf("MYRANK %d failed 2 recvbuf[%d] %d\n",myrank,i,recvbuf[i]); MPI_Finalize(); exit(0); } } for(i = block[1]*2 ; i<block[1]*3;i++){ if(0 != recvbuf[i]){ printf("MYRANK %d failed 3 recvbuf[%d] %d\n",myrank,i,recvbuf[i]); MPI_Finalize(); exit(0); } } for(i = block[1]*3 ; i<block[1]*4;i++){ if(1 != recvbuf[i]){ printf("MYRANK %d failed 4 recvbuf[%d] %d\n",myrank,i,recvbuf[i]); MPI_Finalize(); exit(0); } } } MPI_Finalize(); return 0; }
int main( int argc, char **argv ) { num_particles = read_int( argc, argv, "-n", 1000 ); char *savename = read_string( argc, argv, "-o", NULL ); MPI_Init( &argc, &argv ); MPI_Comm_size( MPI_COMM_WORLD, &num_procs ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); fsave = savename && rank == 0 ? fopen( savename, "w" ) : NULL; particles = (particle_t*) malloc( num_particles * sizeof(particle_t) ); MPI_Type_contiguous( 7, MPI_DOUBLE, &PARTICLE ); MPI_Type_commit( &PARTICLE ); set_size( num_particles ); init_vars(); init_partitions(); if( rank == 0 ) { init_particles( num_particles, particles ); } MPI_Bcast(particles, num_particles, PARTICLE, 0, MPI_COMM_WORLD); partition_particles(); init_grid(); double simulation_time = read_timer(); for( int step = 0; step < NSTEPS; step++ ) { if (rank == 0) { right_fringe(); MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req); MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req); MPI_Wait(&send_right_req, &send_right_stat); MPI_Wait(&recv_right_req, &recv_right_stat); MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count); num_augmented_particles = num_my_particles + recvd_right_count; memcpy(my_particles + num_my_particles, right_receiving_buffer, recvd_right_count * sizeof(particle_t)); } else if (rank == (num_procs-1)) { left_fringe(); MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req); MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req); MPI_Wait(&send_left_req, &send_left_stat); MPI_Wait(&recv_left_req, &recv_left_stat); MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count); num_augmented_particles = num_my_particles + recvd_left_count; memcpy(my_particles + num_my_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t)); } else { left_fringe(); right_fringe(); MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req); MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req); MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req); MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req); MPI_Wait(&send_left_req, &send_left_stat); MPI_Wait(&send_right_req, &send_right_stat); MPI_Wait(&recv_left_req, &recv_left_stat); MPI_Wait(&recv_right_req, &recv_right_stat); MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count); MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count); num_augmented_particles = num_my_particles + recvd_left_count + recvd_right_count; memcpy(my_particles + num_my_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t)); memcpy(my_particles + num_my_particles + recvd_left_count, right_receiving_buffer, recvd_right_count * sizeof(particle_t)); } populate_grid(); time_step(); num_sending_left = 0; num_sending_right = 0; int num_remaining_particles = 0; for ( int i = 0; i < num_my_particles; i++ ) { if (rank != 0 && my_particles[i].x <= partition_offsets[rank]) { left_sending_buffer[num_sending_left++] = my_particles[i]; } else if (rank != (num_procs-1) && my_particles[i].x > partition_offsets[rank+1]) { right_sending_buffer[num_sending_right++] = my_particles[i]; } else { remaining_particles[num_remaining_particles++] = my_particles[i]; } } if (rank == 0) { MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req); MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req); MPI_Wait(&send_right_req, &send_right_stat); MPI_Wait(&recv_right_req, &recv_right_stat); MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count); num_augmented_particles = num_remaining_particles + recvd_right_count; memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t)); memcpy(my_particles + num_remaining_particles, right_receiving_buffer, recvd_right_count * sizeof(particle_t)); num_my_particles = num_augmented_particles; } else if (rank == (num_procs-1)) { MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req); MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req); MPI_Wait(&send_left_req, &send_left_stat); MPI_Wait(&recv_left_req, &recv_left_stat); MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count); num_augmented_particles = num_remaining_particles + recvd_left_count; memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t)); memcpy(my_particles + num_remaining_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t)); num_my_particles = num_augmented_particles; } else { MPI_Isend(right_sending_buffer, num_sending_right, PARTICLE, rank+1, 0, MPI_COMM_WORLD, &send_right_req); MPI_Irecv(right_receiving_buffer, num_my_particles, PARTICLE, rank+1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_right_req); MPI_Isend(left_sending_buffer, num_sending_left, PARTICLE, rank-1, 0, MPI_COMM_WORLD, &send_left_req); MPI_Irecv(left_receiving_buffer, num_my_particles, PARTICLE, rank-1, MPI_ANY_TAG, MPI_COMM_WORLD, &recv_left_req); MPI_Wait(&send_right_req, &send_right_stat); MPI_Wait(&recv_right_req, &recv_right_stat); MPI_Wait(&send_left_req, &send_left_stat); MPI_Wait(&recv_left_req, &recv_left_stat); MPI_Get_count(&recv_left_stat, PARTICLE, &recvd_left_count); MPI_Get_count(&recv_right_stat, PARTICLE, &recvd_right_count); num_augmented_particles = num_remaining_particles + recvd_left_count + recvd_right_count; memcpy(my_particles, remaining_particles, num_remaining_particles * sizeof(particle_t)); memcpy(my_particles + num_remaining_particles, left_receiving_buffer, recvd_left_count * sizeof(particle_t)); memcpy(my_particles + num_remaining_particles + recvd_left_count, right_receiving_buffer, recvd_right_count * sizeof(particle_t)); num_my_particles = num_augmented_particles; } } simulation_time = read_timer() - simulation_time; if( rank == 0 ) { printf( "num_particles = %d, num_procs = %d, simulation time = %g s\n", num_particles, num_procs, simulation_time ); } if (savename) { if (rank == 0) { final_partition_sizes = (int*) malloc( num_procs * sizeof(int) ); } MPI_Gather(&num_my_particles, 1, MPI_INT, final_partition_sizes, 1, MPI_INT, 0, MPI_COMM_WORLD); if (rank == 0) { final_partition_offsets = (int*) malloc( num_procs * sizeof(int) ); final_partition_offsets[0] = 0; for (int i = 1; i < num_procs; i++) { final_partition_offsets[i] = final_partition_offsets[i-1] + final_partition_sizes[i-1]; } } MPI_Gatherv(my_particles, num_my_particles, PARTICLE, particles, final_partition_sizes, final_partition_offsets, PARTICLE, 0, MPI_COMM_WORLD); if (rank == 0) { save( fsave, num_particles, particles ); free(final_partition_sizes); free(final_partition_offsets); } } free( partition_offsets ); free( partition_sizes ); free( my_particles ); // same as my_particles free(remaining_particles); free( left_sending_buffer ); free( right_sending_buffer ); free( left_receiving_buffer ); free( right_receiving_buffer ); if( fsave ) fclose( fsave ); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { MPI_Datatype vec; double *vecin, *vecout; MPI_Comm comm; int count, minsize = 2; int root, i, n, stride, errs = 0; int rank, size; MTest_Init(&argc, &argv); while (MTestGetIntracommGeneral(&comm, minsize, 1)) { if (comm == MPI_COMM_NULL) continue; /* Determine the sender and receiver */ MPI_Comm_rank(comm, &rank); MPI_Comm_size(comm, &size); for (root = 0; root < size; root++) { for (count = 1; count < 65000; count = count * 2) { n = 12; stride = 10; vecin = (double *) malloc(n * stride * size * sizeof(double)); vecout = (double *) malloc(size * n * sizeof(double)); MPI_Type_vector(n, 1, stride, MPI_DOUBLE, &vec); MPI_Type_commit(&vec); for (i = 0; i < n * stride; i++) vecin[i] = -2; for (i = 0; i < n; i++) vecin[i * stride] = rank * n + i; if (rank == root) { for (i = 0; i < n; i++) { vecout[rank * n + i] = rank * n + i; } MPI_Gather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, vecout, n, MPI_DOUBLE, root, comm); } else { MPI_Gather(vecin, 1, vec, NULL, -1, MPI_DATATYPE_NULL, root, comm); } if (rank == root) { for (i = 0; i < n * size; i++) { if (vecout[i] != i) { errs++; if (errs < 10) { fprintf(stderr, "vecout[%d]=%d\n", i, (int) vecout[i]); } } } } MPI_Type_free(&vec); free(vecin); free(vecout); } } MTestFreeComm(&comm); } /* do a zero length gather */ MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (rank == 0) { MPI_Gather(MPI_IN_PLACE, -1, MPI_DATATYPE_NULL, NULL, 0, MPI_BYTE, 0, MPI_COMM_WORLD); } else { MPI_Gather(NULL, 0, MPI_BYTE, NULL, 0, MPI_BYTE, 0, MPI_COMM_WORLD); } MTest_Finalize(errs); MPI_Finalize(); return 0; }
int main(int argc, char **argv) { int i, j, rank, nranks, peer, bufsize, errors; double *win_buf, *loc_buf; MPI_Win buf_win; int idx_rem[SUB_YDIM]; int blk_len[SUB_YDIM]; MPI_Datatype loc_type, rem_type; MTest_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &nranks); bufsize = XDIM * YDIM * sizeof(double); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &win_buf); MPI_Alloc_mem(bufsize, MPI_INFO_NULL, &loc_buf); for (i = 0; i < XDIM*YDIM; i++) { *(win_buf + i) = 1.0 + rank; *(loc_buf + i) = -1.0; } MPI_Win_create(win_buf, bufsize, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &buf_win); peer = (rank+1) % nranks; /* Build the datatype */ for (i = 0; i < SUB_YDIM; i++) { idx_rem[i] = i*XDIM; blk_len[i] = SUB_XDIM; } MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &loc_type); MPI_Type_indexed(SUB_YDIM, blk_len, idx_rem, MPI_DOUBLE, &rem_type); MPI_Type_commit(&loc_type); MPI_Type_commit(&rem_type); /* Perform get operation */ MPI_Win_lock(MPI_LOCK_EXCLUSIVE, peer, 0, buf_win); MPI_Get(loc_buf, 1, loc_type, peer, 0, 1, rem_type, buf_win); /* Use the datatype only on the remote side (must have SUB_XDIM == XDIM) */ /* MPI_Get(loc_buf, SUB_XDIM*SUB_YDIM, MPI_DOUBLE, peer, 0, 1, rem_type, buf_win); */ MPI_Win_unlock(peer, buf_win); MPI_Type_free(&loc_type); MPI_Type_free(&rem_type); MPI_Barrier(MPI_COMM_WORLD); /* Verify that the results are correct */ errors = 0; for (i = 0; i < SUB_XDIM; i++) { for (j = 0; j < SUB_YDIM; j++) { const double actual = *(loc_buf + i + j*XDIM); const double expected = (1.0 + peer); if (fabs(actual - expected) > 1.0e-10) { SQUELCH( printf("%d: Data validation failed at [%d, %d] expected=%f actual=%f\n", rank, j, i, expected, actual); ); errors++; fflush(stdout); } }
/* test case from tt#1030 ported to C * * Thanks to Matthias Lieber for reporting the bug and providing a good test * program. */ int struct_struct_test(void) { int err, errs = 0; int i, j, dt_size = 0; MPI_Request req[2]; #define COUNT (2) MPI_Aint displ[COUNT]; int blens[COUNT]; MPI_Datatype types[COUNT]; MPI_Datatype datatype; /* A slight difference from the F90 test: F90 arrays are column-major, C * arrays are row-major. So we invert the order of dimensions. */ #define N (2) #define M (4) int array[N][M] = { {-1, -1, -1, -1}, {-1, -1, -1, -1} }; int expected[N][M] = { {-1, 1, 2, 5}, {-1, 3, 4, 6} }; int seq_array[N*M]; MPI_Aint astart, aend; MPI_Aint size_exp = 0; /* 1st section selects elements 1 and 2 out of 2nd dimension, complete 1st dim. * should receive the values 1, 2, 3, 4 */ astart = 1; aend = 2; err = build_array_section_type(M, astart, aend, &types[0]); if (err) { errs++; if (verbose) fprintf(stderr, "build_array_section_type failed\n"); return errs; } blens[0] = N; displ[0] = 0; size_exp = size_exp + N * (aend-astart+1) * sizeof(int); /* 2nd section selects last element of 2nd dimension, complete 1st dim. * should receive the values 5, 6 */ astart = 3; aend = 3; err = build_array_section_type(M, astart, aend, &types[1]); if (err) { errs++; if (verbose) fprintf(stderr, "build_array_section_type failed\n"); return errs; } blens[1] = N; displ[1] = 0; size_exp = size_exp + N * (aend-astart+1) * sizeof(int); /* create type */ err = MPI_Type_create_struct(COUNT, blens, displ, types, &datatype); check_err(MPI_Type_create_struct); err = MPI_Type_commit(&datatype); check_err(MPI_Type_commit); err = MPI_Type_size(datatype, &dt_size); check_err(MPI_Type_size); if (dt_size != size_exp) { errs++; if (verbose) fprintf(stderr, "unexpected type size\n"); } /* send the type to ourselves to make sure that the type describes data correctly */ for (i = 0; i < (N*M) ; ++i) seq_array[i] = i + 1; /* source values 1..(N*M) */ err = MPI_Isend(&seq_array[0], dt_size/sizeof(int), MPI_INT, 0, 42, MPI_COMM_SELF, &req[0]); check_err(MPI_Isend); err = MPI_Irecv(&array[0][0], 1, datatype, 0, 42, MPI_COMM_SELF, &req[1]); check_err(MPI_Irecv); err = MPI_Waitall(2, req, MPI_STATUSES_IGNORE); check_err(MPI_Waitall); /* check against expected */ for (i = 0; i < N; ++i) { for (j = 0; j < M; ++j) { if (array[i][j] != expected[i][j]) { errs++; if (verbose) fprintf(stderr, "array[%d][%d]=%d, should be %d\n", i, j, array[i][j], expected[i][j]); } } } err = MPI_Type_free(&datatype); check_err(MPI_Type_free); err = MPI_Type_free(&types[0]); check_err(MPI_Type_free); err = MPI_Type_free(&types[1]); check_err(MPI_Type_free); return errs; #undef M #undef N #undef COUNT }
SEXP matrixApply(SEXP result, SEXP data, SEXP margin, SEXP function, int worldRank, int worldSize) { SEXP ans, data_size; MPI_Datatype row_type, column_type; MPI_Status status; int my_start, my_end, N, function_nlines, nvectors, offset; int local_check = 0, global_check = 0; int dimensions[2]; if (worldRank == MASTER_PROCESS) { data_size = GET_DIM(data); dimensions[0] = INTEGER_POINTER(data_size)[0]; dimensions[1] = INTEGER_POINTER(data_size)[1]; /* function SEXP object is a vector of strings, each element contains a single line of the function definition */ function_nlines = length(function); } MPI_Bcast(dimensions, 2, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&function_nlines, 1, MPI_INT, 0, MPI_COMM_WORLD); /* margin provides the subscripts which the function will be applied over. "1" indicates rows, "2" indicates columns, c(1,2)" indicates rows and columns */ if(worldRank != MASTER_PROCESS) PROTECT(margin = allocVector(INTSXP, 1)); MPI_Bcast(INTEGER(margin), 1, MPI_INT, 0, MPI_COMM_WORLD); /* Matrix dimensions in R are interpreted differen than in C. We will refer to R rows and columns ordering, so rows are not alligned in memory */ if (INTEGER(margin)[0] == 1) { N = dimensions[0]; /* define vector type type to handle R rows exchange (count, blocklength, stride)*/ MPI_Type_vector (dimensions[1], 1, dimensions[0], MPI_DOUBLE, &row_type); MPI_Type_commit (&row_type); } else if (INTEGER(margin)[0] == 2) { N = dimensions[1]; /* define contiguous type to handle R columns exchange */ MPI_Type_contiguous(dimensions[0], MPI_DOUBLE, &column_type); MPI_Type_commit(&column_type); } else if (INTEGER(margin)[0] == 3) { // TODO DEBUG("Margin number 3 not yet implemented\n"); return R_NilValue; } else { DEBUG("Don't know how to deal with margin number %d\n", INTEGER(margin)[0]); return R_NilValue; } if(worldRank != MASTER_PROCESS) { /* Allocate memory for SEXP objects on worker nodes. alloc... functions do their own error-checking and return if the allocation process will fail. */ loopDistribute(worldRank, worldSize, N, &my_start, &my_end); if (INTEGER(margin)[0] == 1) PROTECT(data = allocMatrix(REALSXP, my_end-my_start, dimensions[1])); if (INTEGER(margin)[0] == 2) PROTECT(data = allocMatrix(REALSXP, dimensions[0], my_end-my_start)); PROTECT(function = allocVector(STRSXP, function_nlines)); } if ( (data == NULL) || (function == NULL) ) { local_check = 1; } else { local_check = 0; } /* Check whether memory was successfully allocated on all worker nodes */ MPI_Allreduce(&local_check, &global_check, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); /* No need to free memory if allocation fails on one of the workers R_alloc will release it after .Call returns to R */ if ( global_check != 0 ) { /* Remove all references from the stack, I'm not sure if this is necessary */ if(worldRank != MASTER_PROCESS) UNPROTECT(3); return ScalarInteger(-1); } /* Distribute data between processes */ for (int worker_id=1; worker_id<worldSize; worker_id++) { if (worldRank == MASTER_PROCESS) { /* Calculate expected message length for each worker */ loopDistribute(worker_id, worldSize, N, &my_start, &my_end); nvectors = my_end - my_start; /* If we applying over rows, as defined in R, we need to use the MPI vector type sending each row as a separate message */ if (INTEGER(margin)[0] == 1) { for(int k=0; k<nvectors; k++) { offset = my_start+k; MPI_Send(&REAL(data)[offset], 1, row_type, worker_id, 0, MPI_COMM_WORLD); } } /* R defined columns are alligned in memory, single message of build from contiguous column_type elemensts is send */ else if (INTEGER(margin)[0] == 2) { offset = my_start*dimensions[0]; MPI_Send(&REAL(data)[offset], nvectors, column_type, worker_id, 0, MPI_COMM_WORLD); } } else if (worldRank == worker_id) { nvectors = my_end - my_start; if (INTEGER(margin)[0] == 1) { for(int k=0; k<nvectors; k++) { offset = k*dimensions[1]; MPI_Recv(&REAL(data)[offset], dimensions[1], MPI_DOUBLE, MASTER_PROCESS, 0, MPI_COMM_WORLD, &status); } } else if (INTEGER(margin)[0] == 2) { MPI_Recv(REAL(data), nvectors, column_type, MASTER_PROCESS, 0, MPI_COMM_WORLD, &status); } } } /* Redo loop distribution for the Master process */ if (worldRank == MASTER_PROCESS) { loopDistribute(worldRank, worldSize, N, &my_start, &my_end); } /* Bcast function name or definition, cover case when definition is split into several lines and stored as a SEXP string vector */ bcastRFunction(function, function_nlines, worldRank); /* Response container, Vector of SEXPs, margin determines vector length */ PROTECT(ans = allocVector(VECSXP, N)); do_matrixApply(ans, data, margin, function, my_start, my_end, dimensions, worldRank); gatherData(result, ans, N, my_start, my_end, worldRank); if(worldRank != MASTER_PROCESS) { UNPROTECT(4); } else { UNPROTECT(1); } return result; }
/* regression for tt#1030, checks for bad offset math in the * blockindexed and indexed dataloop flattening code */ int flatten_test(void) { int err, errs = 0; #define ARR_SIZE (9) /* real indices 0 1 2 3 4 5 6 7 8 * indices w/ &array[3] -3 -2 -1 0 1 2 3 4 5 */ int array[ARR_SIZE] = {-1,-1,-1,-1,-1,-1,-1,-1,-1}; int expected[ARR_SIZE] = {-1, 0, 1,-1, 2,-1, 3,-1, 4}; MPI_Datatype idx_type = MPI_DATATYPE_NULL; MPI_Datatype blkidx_type = MPI_DATATYPE_NULL; MPI_Datatype combo = MPI_DATATYPE_NULL; #define COUNT (2) int displ[COUNT]; MPI_Aint adispl[COUNT]; int blens[COUNT]; MPI_Datatype types[COUNT]; /* indexed type layout: * XX_X * 2101 <-- pos (left of 0 is neg) * * different blens to prevent optimization into a blockindexed */ blens[0] = 2; displ[0] = -2; /* elements, puts byte after block end at 0 */ blens[1] = 1; displ[1] = 1; /*elements*/ err = MPI_Type_indexed(COUNT, blens, displ, MPI_INT, &idx_type); check_err(MPI_Type_indexed); err = MPI_Type_commit(&idx_type); check_err(MPI_Type_commit); /* indexed type layout: * _X_X * 2101 <-- pos (left of 0 is neg) */ displ[0] = -1; displ[1] = 1; err = MPI_Type_create_indexed_block(COUNT, 1, displ, MPI_INT, &blkidx_type); check_err(MPI_Type_indexed_block); err = MPI_Type_commit(&blkidx_type); check_err(MPI_Type_commit); /* struct type layout: * II_I_B_B (I=idx_type, B=blkidx_type) * 21012345 <-- pos (left of 0 is neg) */ blens[0] = 1; adispl[0] = 0; /*bytes*/ types[0] = idx_type; blens[1] = 1; adispl[1] = 4 * sizeof(int); /* bytes */ types[1] = blkidx_type; /* must be a struct in order to trigger flattening code */ err = MPI_Type_create_struct(COUNT, blens, adispl, types, &combo); check_err(MPI_Type_indexed); err = MPI_Type_commit(&combo); check_err(MPI_Type_commit); /* pack/unpack with &array[3] */ errs += pack_and_check_expected(combo, "combo", 3, ARR_SIZE, array, expected); MPI_Type_free(&combo); MPI_Type_free(&idx_type); MPI_Type_free(&blkidx_type); return errs; #undef COUNT }
int main(int argc, char **argv) { size_t dimensions; size_t i, j; scanf("%zu", &dimensions); struct complex *matrix = calloc(sizeof(struct complex), dimensions * dimensions); struct complex temp; for (i = 0; i < dimensions; ++i) { for (j = 0; j < dimensions; ++j) { scanf("%lf", &temp.re); scanf("%lf", &temp.im); temp.x = (int) i; temp.y = (int) j; matrix[i * dimensions + j] = temp; } } int counter, size; double begin, end; begin = omp_get_wtime(); MPI_Init(&argc, &argv); MPI_Datatype complex_t; MPI_Datatype type[4] = {MPI_DOUBLE, MPI_DOUBLE, MPI_INT, MPI_INT}; int blocklen[4] = {1, 1, 1, 1}; //*because readability is our main concern*//* MPI_Aint disp[4]; MPI_Type_create_struct(4, blocklen, disp, type, &complex_t); MPI_Type_commit(&complex_t); MPI_Comm_rank(MPI_COMM_WORLD, &counter); MPI_Comm_size(MPI_COMM_WORLD, &size); printf("%d %d", counter, size); struct complex thread_min = matrix[0]; thread_min.x = counter; thread_min.y = 0; struct complex thread_max = matrix[0]; thread_max.x = counter; thread_max.y = 0; for (i = (size_t) counter; i < dimensions; i += size) { for (j = 0; j < dimensions; ++j) { if (length(matrix[i * dimensions + j]) < length(thread_min)) { thread_min = matrix[i * dimensions + j]; } if (length(matrix[i * dimensions + j]) > length(thread_max)) { thread_max = matrix[i * dimensions + j]; } } } if (counter != 0) { MPI_Send(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD); MPI_Send(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD); } if (counter == 0) { struct complex min = thread_min; struct complex max = thread_max; for (i = 1; i < size; ++i) { MPI_Recv(&thread_min, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL); MPI_Recv(&thread_max, 1, complex_t, 0, 0, MPI_COMM_WORLD, NULL); printf("%.2f+i*%.2f", thread_min.re, thread_min.im); printf("%.2f+i*%.2f", thread_max.re, thread_max.im); if (length(thread_min) < length(min)) { min = thread_min; } if (length(thread_max) > length(max)) { max = thread_max; } } printf("max complex number %.2f+i*%.2f position x:%d y:%d \n", max.re, max.im, max.x, max.y); printf("min complex number %.2f+i*%.2f position x:%d, y:%d \n", min.re, min.im, min.x, min.y); } MPI_Finalize(); end = omp_get_wtime(); printf("execution time: %f\n", end - begin); free(matrix); return 0; }
int main(int argc, char **argv) { int rank, size; int n, energy, niters, px, py; int north, south, west, east; int bx, by, offx, offy; /* three heat sources */ const int nsources = 3; int sources[nsources][2]; int locnsources; /* number of sources in my area */ int locsources[nsources][2]; /* sources local to my rank */ double t1, t2; int iter, i; double *aold, *anew, *tmp; double heat, rheat; int final_flag; /* initialize MPI envrionment */ MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); /* argument checking and setting */ setup(rank, size, argc, argv, &n, &energy, &niters, &px, &py, &final_flag); if (final_flag == 1) { MPI_Finalize(); exit(0); } /* Create a communicator with a topology */ MPI_Comm cart_comm; int dims[2] = {0,0}, periods[2] = {0,0}, coords[2]; MPI_Dims_create(size, 2, dims); MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 0, &cart_comm); MPI_Cart_coords(cart_comm, rank, 2, coords); /* determine my four neighbors */ MPI_Cart_shift(cart_comm, 0, 1, &west, &east); MPI_Cart_shift(cart_comm, 1, 1, &north, &south); /* decompose the domain */ bx = n / px; /* block size in x */ by = n / py; /* block size in y */ offx = coords[0] * bx; /* offset in x */ offy = coords[1] * by; /* offset in y */ /* printf("%i (%i,%i) - w: %i, e: %i, n: %i, s: %i\n", rank, ry,rx,west,east,north,south); */ /* allocate working arrays & communication buffers */ MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &aold); /* 1-wide halo zones! */ MPI_Alloc_mem((bx+2)*(by+2)*sizeof(double), MPI_INFO_NULL, &anew); /* 1-wide halo zones! */ /* initialize three heat sources */ init_sources(bx, by, offx, offy, n, nsources, sources, &locnsources, locsources); /* create north-south datatype */ MPI_Datatype north_south_type; MPI_Type_contiguous(bx, MPI_DOUBLE, &north_south_type); MPI_Type_commit(&north_south_type); /* create east-west type */ MPI_Datatype east_west_type; MPI_Type_vector(by,1,bx+2,MPI_DOUBLE, &east_west_type); MPI_Type_commit(&east_west_type); t1 = MPI_Wtime(); /* take time */ for (iter = 0; iter < niters; ++iter) { /* refresh heat sources */ for (i = 0; i < locnsources; ++i) { aold[ind(locsources[i][0],locsources[i][1])] += energy; /* heat source */ } /* exchange data with neighbors */ MPI_Request reqs[8]; MPI_Isend(&aold[ind(1,1)] /* north */, 1, north_south_type, north, 9, cart_comm, &reqs[0]); MPI_Isend(&aold[ind(1,by)] /* south */, 1, north_south_type, south, 9, cart_comm, &reqs[1]); MPI_Isend(&aold[ind(bx,1)] /* east */, 1, east_west_type, east, 9, cart_comm, &reqs[2]); MPI_Isend(&aold[ind(1,1)] /* west */, 1, east_west_type, west, 9, cart_comm, &reqs[3]); MPI_Irecv(&aold[ind(1,0)] /* north */, 1, north_south_type, north, 9, cart_comm, &reqs[4]); MPI_Irecv(&aold[ind(1,by+1)] /* south */, 1, north_south_type, south, 9, cart_comm, &reqs[5]); MPI_Irecv(&aold[ind(bx+1,1)] /* west */, 1, east_west_type, east, 9, cart_comm, &reqs[6]); MPI_Irecv(&aold[ind(0,1)] /* east */, 1, east_west_type, west, 9, cart_comm, &reqs[7]); MPI_Waitall(8, reqs, MPI_STATUS_IGNORE); /* update grid points */ update_grid(bx, by, aold, anew, &heat); /* swap working arrays */ tmp = anew; anew = aold; aold = tmp; /* optional - print image */ if (iter == niters-1) printarr_par(iter, anew, n, px, py, coords[0], coords[1], bx, by, offx, offy, MPI_COMM_WORLD); } t2 = MPI_Wtime(); /* free working arrays and communication buffers */ MPI_Free_mem(aold); MPI_Free_mem(anew); MPI_Type_free(&east_west_type); MPI_Type_free(&north_south_type); /* get final heat in the system */ MPI_Allreduce(&heat, &rheat, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (!rank) printf("[%i] last heat: %f time: %f\n", rank, rheat, t2-t1); MPI_Finalize(); }
/* subarray_4d_fortran_test1() * * Returns the number of errors encountered. */ int subarray_4d_fortran_test1(void) { MPI_Datatype subarray; int array[] = { -1111, -1112, -1113, -1114, -1115, -1116, -1121, -1122, -1123, -1124, -1125, -1126, -1131, -1132, -1133, -1134, -1135, -1136, -1211, -1212, -1213, -1214, -1215, -1216, -1221, -1222, -1223, -1224, -1225, -1226, -1231, -1232, -1233, -1234, -1235, -1236, -2111, -2112, -2113, -2114, 1, -2116, -2121, -2122, -2123, -2124, 2, -2126, -2131, -2132, -2133, -2134, 3, -2136, -2211, -2212, -2213, -2214, 4, -2216, -2221, -2222, -2223, -2224, 5, -2226, -2231, -2232, -2233, -2234, 6, -2236 }; int array_size[4] = { 6, 3, 2, 2 }; int array_subsize[4] = { 1, 3, 2, 1 }; int array_start[4] = { 4, 0, 0, 1 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(4, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_FORTRAN, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 6 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (6 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 72 * sizeof(int)); for (i = 0; i < 72; i++) { int goodval; switch (i) { case 40: goodval = 1; break; case 46: goodval = 2; break; case 52: goodval = 3; break; case 58: goodval = 4; break; case 64: goodval = 5; break; case 70: goodval = 6; break; default: goodval = 0; break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
int main(int argc, char ** argv){ int my_id, root, ierr, num_procs; MPI_Status status; ierr = MPI_Init(&argc, &argv);//Creat processes ierr = MPI_Comm_rank(MPI_COMM_WORLD, &my_id); ierr = MPI_Comm_size(MPI_COMM_WORLD, &num_procs); /*Make MPI data type for Vars*/ const int nitems=5; int blocklengths[5] = {1, 1, 1, 1, 1}; MPI_Datatype types[5] = { MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE, MPI_DOUBLE}; MPI_Datatype mpi_Vars; MPI_Aint offsets[5]; offsets[0] = offsetof(Vars, mass); offsets[1] = offsetof(Vars, xvelocity); offsets[2] = offsetof(Vars, yvelocity); offsets[3] = offsetof(Vars, energy); offsets[4] = offsetof(Vars, press); MPI_Type_create_struct(nitems, blocklengths, offsets, types, &mpi_Vars); MPI_Type_commit(&mpi_Vars); /*start the program*/ int N, type; N = num_procs*100; type = 1; int zones_to_do = N/num_procs; double dt; int count = 0;char str[80]; FILE *fid, *finit; double dx = 1./(double)N; double t, T; t = 0.; T = .2; int num = 30; Vars * U = malloc((N+4)*(N+4)*sizeof(Vars)); init_sys(N+4, N+4, U, dx, dx, 1); if(my_id == 0){ /*I am root*/ finit = fopen("2Dinit.dat","w"); Write_Cons(N+4, N+4, U, dx, dx, finit); fclose(finit); int count = 0; } while(t<T){ //printf("before\n"); dt = advance_system(N+4, N+4, U, dx, dx, my_id, zones_to_do, num_procs, mpi_Vars); t+=dt; //break; //printf("what time is it = %f\n", dt); /*Broadcast U*/ ierr = MPI_Bcast(U, (N+4)*(N+4), mpi_Vars, 0, MPI_COMM_WORLD); /* if(my_id == 0){ if( count % 1 == 0){ sprintf(str, "T_%d.dat", count); fid = fopen(str, "w"); Write_Cons(N+4, N+4, U, dx, dx, fid); fclose(fid); //printf("T=%f\n", t); } count += 1; }*/ } if(my_id == 0){ /*I am Root*/ printf("%d\n", count); fid = fopen("22data.dat","w"); Write_Cons(N+4, N+4, U, dx, dx, fid); fclose(fid); } free(U); MPI_Finalize(); }
/* subarray_2d_fortran_test1() * * Returns the number of errors encountered. */ int subarray_2d_fortran_test1(void) { MPI_Datatype subarray; int array[12] = { -1, -2, -3, -4, 1, 2, -5, -6, -7, -8, -9, -10 }; int array_size[2] = { 6, 2 }; int array_subsize[2] = { 2, 1 }; int array_start[2] = { 4, 0 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(2, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_FORTRAN, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 2 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (2 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 12 * sizeof(int)); for (i = 0; i < 12; i++) { int goodval; switch (i) { case 4: goodval = 1; break; case 5: goodval = 2; break; default: goodval = 0; break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
void MPI_New_Datatype() { /* Create new MPI datatype: ElemPack type definition in struct.h so structures of ElemPack and NeighborPack can be sent and received */ /* MPI_Datatype ELEMTYPE; MPI_Datatype NEIGHTYPE; MPI_Datatype REFINED_INFO; MPI_Datatype ENRICHED_INFO; MPI_Datatype NSOLTYPE; MPI_Datatype LB_VERT_TYPE;*/ int blockcounts[3]={58, 25*KEYLENGTH, 102}; MPI_Datatype types[3]; MPI_Aint displs[3]; int d; ElemPack* elem=new ElemPack; MPI_Address(&(elem->myprocess), &displs[0]); MPI_Address(&(elem->key[0]), &displs[1]); MPI_Address(&(elem->elevation), &displs[2]); types[0]=MPI_INT; types[1]=MPI_UNSIGNED; types[2]=MPI_DOUBLE; for(d=2; d>=0; d--) displs[d]-=displs[0]; MPI_Type_struct(3, blockcounts, displs, types, &ELEMTYPE); MPI_Type_commit(&ELEMTYPE); //create the 2nd new d_type int blockcounts2[2]={2, 2*KEYLENGTH}; MPI_Datatype types2[2]; MPI_Aint displs2[2]; NeighborPack* neigh=new NeighborPack; MPI_Address(&(neigh->target_proc), &displs2[0]); MPI_Address(&(neigh->elkey), &displs2[1]); types2[0]=MPI_INT; types2[1]=MPI_UNSIGNED; for(d=1; d>=0; d--) displs2[d]-=displs2[0]; MPI_Type_struct(2, blockcounts2, displs2, types2, &NEIGHTYPE); MPI_Type_commit(&NEIGHTYPE); //create the 3rd new d_type int blockcounts3[2]={1, 4*KEYLENGTH}; MPI_Datatype types3[2]={MPI_INT, MPI_UNSIGNED}; MPI_Aint displs3[2]={0,0}; refined_neighbor_pack* fine=new refined_neighbor_pack; MPI_Address(&(fine->orig_gen), &displs3[0]); MPI_Address(&(fine->target_element), &displs3[1]); for(d=1; d>=0; d--) displs3[d]-=displs3[0]; MPI_Type_struct(2, blockcounts3, displs3, types3, &REFINED_INFO); MPI_Type_commit(&REFINED_INFO); //create the 4th new d_type // for getting the neighbor solution in the new error estimator, when the neighbor is in diff subdomain int blockcounts5[3]={6,KEYLENGTH,260}; MPI_Datatype types5[3]; MPI_Aint displs5[3]; Neigh_Sol_Pack* neigh_sol = new Neigh_Sol_Pack; MPI_Address(&(neigh_sol->nside), &displs5[0]); MPI_Address((neigh_sol->key), &displs5[1]); MPI_Address((neigh_sol->solu), &displs5[2]); types5[0] = MPI_INT; types5[1] = MPI_UNSIGNED; types5[2] = MPI_DOUBLE; for(d=2; d>=0; d--) displs5[d]-=displs5[0]; MPI_Type_struct(3, blockcounts5, displs5, types5, &NSOLTYPE); MPI_Type_commit(&NSOLTYPE); //delete neigh_sol; //added by acbauer 4/3/02 -- may be a bug????? int blockcounts6[3] = {3,KEYLENGTH+1,1}; MPI_Datatype types6[3] = {MPI_INT, MPI_UNSIGNED, MPI_FLOAT}; MPI_Aint displs6[3]; BSFC_VERTEX* sfc_vert_ptr = new BSFC_VERTEX; MPI_Address(&(sfc_vert_ptr->destination_proc), &displs6[0]); MPI_Address(&(sfc_vert_ptr->sfc_key[0]), &displs6[1]); MPI_Address(&(sfc_vert_ptr->lb_weight), &displs6[2]); for(d=2; d>=0; d--) displs6[d]-=displs6[0]; MPI_Type_struct(3, blockcounts6, displs6, types6, &LB_VERT_TYPE); MPI_Type_commit(&LB_VERT_TYPE); //New data types are created at this point }
/* subarray_1d_c_test1() * * Returns the number of errors encountered. */ int subarray_1d_c_test1(void) { MPI_Datatype subarray; int array[9] = { -1, 1, 2, 3, -2, -3, -4, -5, -6 }; int array_size[] = { 9 }; int array_subsize[] = { 3 }; int array_start[] = { 1 }; int i, err, errs = 0, sizeoftype; /* set up type */ err = MPI_Type_create_subarray(1, /* dims */ array_size, array_subsize, array_start, MPI_ORDER_C, MPI_INT, &subarray); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "error in MPI_Type_create_subarray call; aborting after %d errors\n", errs); } return errs; } MPI_Type_commit(&subarray); MPI_Type_size(subarray, &sizeoftype); if (sizeoftype != 3 * sizeof(int)) { errs++; if (verbose) fprintf(stderr, "size of type = %d; should be %d\n", sizeoftype, (int) (3 * sizeof(int))); return errs; } err = pack_and_unpack((char *) array, 1, subarray, 9 * sizeof(int)); for (i = 0; i < 9; i++) { int goodval; switch (i) { case 1: goodval = 1; break; case 2: goodval = 2; break; case 3: goodval = 3; break; default: goodval = 0; /* pack_and_unpack() zeros before unpacking */ break; } if (array[i] != goodval) { errs++; if (verbose) fprintf(stderr, "array[%d] = %d; should be %d\n", i, array[i], goodval); } } MPI_Type_free(&subarray); return errs; }
int main( int argc, char *argv[] ) { int errs = 0; MPI_Win win; int *rmabuffer=0, *getbuf=0; MPI_Aint bufsize=0, getbufsize=0; int master, partner, next, wrank, wsize, i; int ntest = LAST_TEST; int *srcbuf; MTest_Init( &argc, &argv ); /* Determine who is responsible for each part of the test */ MPI_Comm_rank( MPI_COMM_WORLD, &wrank ); MPI_Comm_size( MPI_COMM_WORLD, &wsize ); if (wsize < 3) { fprintf( stderr, "This test requires at least 3 processes\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } master = 0; partner = 1; next = wrank + 1; if (next == partner) next++; if (next >= wsize) { next = 0; if (next == partner) next++; } /* Determine the last test to run (by default, run them all) */ for (i=1; i<argc; i++) { if (strcmp( "-ntest", argv[i] ) == 0) { i++; if (i < argc) { ntest = atoi( argv[i] ); } else { fprintf( stderr, "Missing value for -ntest\n" ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } } MPI_Type_vector( veccount, 1, stride, MPI_INT, &vectype ); MPI_Type_commit( &vectype ); /* Create the RMA window */ bufsize = 0; if (wrank == master) { bufsize = RMA_SIZE; MPI_Alloc_mem( bufsize*sizeof(int), MPI_INFO_NULL, &rmabuffer ); } else if (wrank == partner) { getbufsize = RMA_SIZE; getbuf = (int *)malloc( getbufsize*sizeof(int) ); if (!getbuf) { fprintf( stderr, "Unable to allocated %d bytes for getbuf\n", (int)getbufsize ); MPI_Abort( MPI_COMM_WORLD, 1 ); } } srcbuf = malloc(RMA_SIZE*sizeof(*srcbuf)); assert(srcbuf); MPI_Win_create( rmabuffer, bufsize, sizeof(int), MPI_INFO_NULL, MPI_COMM_WORLD, &win ); /* Run a sequence of tests */ for (i=0; i<=ntest; i++) { if (wrank == master) { MTestPrintfMsg( 0, "Test %d\n", i ); /* Because this lock is local, it must return only when the lock is acquired */ MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win ); RMATestInit( i, rmabuffer, bufsize ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD ); MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Win_unlock( master, win ); MPI_Recv( MPI_BOTTOM, 0, MPI_INT, partner, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); errs += RMACheck( i, rmabuffer, bufsize ); } else if (wrank == partner) { MPI_Recv( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Win_lock( MPI_LOCK_EXCLUSIVE, 0, master, win ); RMATest( i, win, master, srcbuf, RMA_SIZE, getbuf, getbufsize ); MPI_Win_unlock( master, win ); errs += RMACheckGet( i, win, getbuf, getbufsize ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, master, i, MPI_COMM_WORLD ); } else { MPI_Recv( MPI_BOTTOM, 0, MPI_INT, MPI_ANY_SOURCE, i, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); MPI_Send( MPI_BOTTOM, 0, MPI_INT, next, i, MPI_COMM_WORLD ); } } if (rmabuffer) { MPI_Free_mem( rmabuffer ); } if (getbuf) { free( getbuf ); } MPI_Win_free( &win ); MPI_Type_free( &vectype ); MTest_Finalize( errs ); MPI_Finalize(); return MTestReturnValue( errs ); }
/** * accumulates pieces of the spinor field on nodes with index 0 in the dimensions given in which * the collected data is returned */ void spinor_fft_reduce_2d(spinor *localSpinorField,int *collectionRank,spinor*** field_collection,spinor **membuff){ /* this implementation is intended for four dimensional parallelisation */ #if (defined PARALLELXYZT && defined MPI && defined HAVE_FFTW) int sendRecvCoord[4]; int i; int dims[]={g_nproc_t,g_nproc_x,g_nproc_y,g_nproc_z}; /* logfile variables */ char *logFilePrefix="Process"; char logFileName[512]; FILE *logFile; const int MSG_LOCALDATA = 457; MPI_Status ierr; MPI_Datatype mpi_local_spinor; const int which[]={0,1}; (*field_collection)=NULL; (*membuff)=NULL; /* int result; */ sprintf(logFileName,"./%s_%02d.log",logFilePrefix,g_cart_id); logFile=fopen(logFileName,"a"); MPI_Type_contiguous(VOLUME, field_point, &mpi_local_spinor); MPI_Type_commit(&mpi_local_spinor); for(i=0;i<4;i++) sendRecvCoord[i]=g_proc_coords[i]; if( g_proc_coords[which[0]] == 0 && g_proc_coords[which[1]] == 0 ){ /* i am one of the nodes where data is accumulated */ spinor **accu_field; spinor **fft_field; spinor *memory_buffer_accu_field; spinor *memory_buffer_fft_field; int REDUCTIONVOLUME=1; int recvRank; MPI_Request *requests; MPI_Status *status; int request_count=0; int num_requests; fftw_plan local_2d_fft_forward; *collectionRank=TRUE; /* calculate the number of reduced 2d volume accumulated in this node */ /* number of spinor fields in local units */ REDUCTIONVOLUME*=dims[which[0]]*dims[which[1]]; /* number of receive messages */ num_requests=REDUCTIONVOLUME-1; /* reserve space for receive messages */ requests=(MPI_Request*)malloc(sizeof(MPI_Request)*num_requests); status=(MPI_Status*)malloc(sizeof(MPI_Status)*num_requests); fprintf(logFile,"reduction volume = %d\n",REDUCTIONVOLUME); /* allocate space for spinor field collection */ allocate_spinor_field_array(&accu_field,&memory_buffer_accu_field,VOLUME,REDUCTIONVOLUME); allocate_spinor_field_array(&fft_field,&memory_buffer_fft_field,VOLUME,REDUCTIONVOLUME); /* receive from certain nodes pieces of the spinor field */ for(sendRecvCoord[which[0]] = 0 ; sendRecvCoord[which[0]]< dims[which[0]] ; sendRecvCoord[which[0]]++){ for(sendRecvCoord[which[1]] = 0 ; sendRecvCoord[which[1]]< dims[which[1]] ; sendRecvCoord[which[1]]++){ if( sendRecvCoord[which[0]] != 0 || sendRecvCoord[which[1]] != 0){ MPI_Cart_rank(g_cart_grid,sendRecvCoord,&recvRank); MPI_Irecv(accu_field[sendRecvCoord[which[0]]*dims[which[1]]+sendRecvCoord[which[1]] ] /* buffer */, 1, /* how may */ mpi_local_spinor, /* mpi data type */ recvRank, /* from whom i get it */ MSG_LOCALDATA, /* msg id */ g_cart_grid, /* communicator , status */ requests+request_count); ++request_count; } } } /* wait until all request finished */ MPI_Waitall(num_requests, requests, status); assign(accu_field[0],localSpinorField,VOLUME); /* transpose in xp-t space */ spinor_fft_transpose_xp_t(fft_field[0],accu_field[0],dims[0],dims[1],TRUE,1.); /* create fftw plan */ local_2d_fft_forward=spinor_fftw_plan2d(fft_field[0],accu_field[0],T*dims[0],LX*dims[1],LY*LZ,1,FFTW_ESTIMATE); fftw_execute(local_2d_fft_forward); fftw_destroy_plan(local_2d_fft_forward); /* assign(accu_field[0],fft_field[0],VOLUME*REDUCTIONVOLUME); */ free_spinor_field_array(&memory_buffer_fft_field); memory_buffer_fft_field=NULL; /* free_spinor_field_array(&memory_buffer_accu_field); memory_buffer_accu_field=NULL; */ (*field_collection)=accu_field; (*membuff)=memory_buffer_accu_field; free(requests); requests = NULL; free(status); status=NULL; } else { int sendRank; MPI_Request request; MPI_Status status; *collectionRank=FALSE; /* coordinates of the "root" */ sendRecvCoord[which[0]]=0; sendRecvCoord[which[1]]=0; MPI_Cart_rank(g_cart_grid,sendRecvCoord,&sendRank); MPI_Isend(localSpinorField,1,mpi_local_spinor,sendRank,MSG_LOCALDATA,g_cart_grid,&request); MPI_Wait(&request,&status); } MPI_Type_free(&mpi_local_spinor); fclose(logFile); #else if(g_proc_id==0) fprintf(stderr,"Error: Please choose FOUR dimensional parallelization!!!\n"); #endif }
int main(int argc, char *argv[]) { int my_id, nprocs; int mpi_dims[4]; int period[4] = {0, 0, 0, 0}; int coords[4]; int dimsf[4] = {nbands, gpts, gpts, gpts}; int count[4]; int offset[4]; int ndims = 4; double t0, t1; #ifdef PAPI PAPI_dmem_info_t dmem; double mem1, mem2, mem1_max, mem2_max, mem1_ave, mem2_ave; int papi_err; #endif double *my_data; MPI_Comm cart_comm; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); assert(argc == 5); for (int i=1; i < argc; i++) mpi_dims[i-1] = atoi(argv[i]); assert(mpi_dims[0] * mpi_dims[1] * mpi_dims[2] * mpi_dims[3] == nprocs); MPI_Cart_create(MPI_COMM_WORLD, 4, mpi_dims, period, 0, &cart_comm); MPI_Comm_rank(cart_comm, &my_id); MPI_Cart_coords(cart_comm, my_id, 4, coords); assert(nbands % mpi_dims[0] == 0); for (int i=1; i < 4; i++) assert(gpts % mpi_dims[i] == 0); int total_size = nbands*gpts*gpts*gpts; count[0] = nbands / mpi_dims[0]; offset[0] = coords[0] * count[0]; int data_size = count[0]; for (int i=1; i < 4; i++) { count[i] = gpts/mpi_dims[i]; offset[i] = coords[i] * count[i]; data_size *= count[i]; } my_data = (double *) malloc(data_size * sizeof(double)); for (int i=0; i < data_size; i++) my_data[i] = my_id; MPI_Info info; MPI_File fp; MPI_Datatype filetype; // MPI_Info_set(info, "cb_nodes", "64"); MPI_Barrier(MPI_COMM_WORLD); #ifdef PAPI papi_err = PAPI_get_dmem_info(&dmem); if (papi_err != PAPI_OK) printf("PAPI_ERR\n"); mem1 = (double)dmem.size / 1024.0; MPI_Reduce(&mem1, &mem1_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm); MPI_Reduce(&mem1, &mem1_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm); mem1_ave /= nprocs; #endif t0 = MPI_Wtime(); MPI_File_open(MPI_COMM_WORLD, "test.dat", MPI_MODE_CREATE|MPI_MODE_WRONLY, MPI_INFO_NULL, &fp); MPI_Type_create_subarray(ndims, dimsf, count, offset, MPI_ORDER_C, MPI_DOUBLE, &filetype); MPI_Type_commit(&filetype); MPI_File_set_view(fp, 0, MPI_DOUBLE, filetype, "native", MPI_INFO_NULL); MPI_File_write_all(fp, my_data, data_size, MPI_DOUBLE, MPI_STATUS_IGNORE); MPI_Type_free(&filetype); MPI_File_close(&fp); MPI_Barrier(MPI_COMM_WORLD); t1 = MPI_Wtime(); #ifdef PAPI papi_err = PAPI_get_dmem_info(&dmem); if (papi_err != PAPI_OK) printf("PAPI_ERR\n"); mem2 = (double)dmem.size/ 1024.0; MPI_Reduce(&mem2, &mem2_max, 1, MPI_DOUBLE, MPI_MAX, 0, cart_comm); MPI_Reduce(&mem2, &mem2_ave, 1, MPI_DOUBLE, MPI_SUM, 0, cart_comm); mem2_ave /= nprocs; #endif if (my_id == 0) { printf("IO time %f (%f) MB %f s\n", total_size * 8/(1024.0*1024.0), data_size * 8/(1024.0*1024.0), t1-t0); #ifdef PAPI printf("Memory usage max (ave): %f (%f) %f (%f) \n", mem1_max, mem1_ave, mem2_max, mem2_ave); #endif } MPI_Finalize(); }
/* test uses a vector type that describes data that is contiguous, * but processed in a noncontiguous way. this is effectively the * same type as in the struct_negdisp_test above. */ int vector_negstride_test(void) { int err, errs = 0; int sendbuf[6] = { 1, 2, 3, 4, 5, 6 }; int recvbuf[6] = { -1, -2, -3, -4, -5, -6 }; MPI_Datatype myvector; MPI_Request request; MPI_Status status; err = MPI_Type_vector(2, 1, -1, MPI_INT, &myvector); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Type_vector returned error\n"); } } MPI_Type_commit(&myvector); err = MPI_Irecv(recvbuf+1, 4, MPI_INT, 0, 0, MPI_COMM_SELF, &request); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Irecv returned error\n"); } } err = MPI_Send(sendbuf+2, 2, myvector, 0, 0, MPI_COMM_SELF); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Send returned error\n"); } } err = MPI_Wait(&request, &status); if (err != MPI_SUCCESS) { errs++; if (verbose) { fprintf(stderr, "MPI_Wait returned error\n"); } } /* verify data */ if (recvbuf[0] != -1) { errs++; if (verbose) { fprintf(stderr, "recvbuf[0] = %d; should be %d\n", recvbuf[0], -1); } } if (recvbuf[1] != 3) { errs++; if (verbose) { fprintf(stderr, "recvbuf[1] = %d; should be %d\n", recvbuf[1], 3); } } if (recvbuf[2] != 2) { errs++; if (verbose) { fprintf(stderr, "recvbuf[2] = %d; should be %d\n", recvbuf[2], 2); } } if (recvbuf[3] != 5) { errs++; if (verbose) { fprintf(stderr, "recvbuf[3] = %d; should be %d\n", recvbuf[3], 5); } } if (recvbuf[4] != 4) { errs++; if (verbose) { fprintf(stderr, "recvbuf[4] = %d; should be %d\n", recvbuf[4], 4); } } if (recvbuf[5] != -6) { errs++; if (verbose) { fprintf(stderr, "recvbuf[5] = %d; should be %d\n", recvbuf[5], -6); } } MPI_Type_free(&myvector); return errs; }
/* ADIOI_PVFS2_Open: * one process opens (or creates) the file, then broadcasts the result to the * remaining processors. * * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before * that, MPI_MODE_EXCL) was set. Because PVFS2 handles file lookup and * creation more scalably than other file systems, ADIO_Open now skips any * special handling when CREATE is set. */ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code) { int rank, ret; PVFS_fs_id cur_fs; static char myname[] = "ADIOI_PVFS2_OPEN"; char pvfs_path[PVFS_NAME_MAX] = {0}; ADIOI_PVFS2_fs *pvfs2_fs; /* since one process is doing the open, that means one process is also * doing the error checking. define a struct for both the object reference * and the error code to broadcast to all the processors */ open_status o_status = {0, {0, 0}}; MPI_Datatype open_status_type; MPI_Datatype types[2] = {MPI_INT, MPI_BYTE}; int lens[2] = {1, sizeof(PVFS_object_ref)}; MPI_Aint offsets[2]; pvfs2_fs = (ADIOI_PVFS2_fs *) ADIOI_Malloc(sizeof(ADIOI_PVFS2_fs)); /* --BEGIN ERROR HANDLING-- */ if (pvfs2_fs == NULL) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_UNKNOWN, "Error allocating memory", 0); return; } /* --END ERROR HANDLING-- */ MPI_Comm_rank(fd->comm, &rank); ADIOI_PVFS2_Init(error_code); if (*error_code != MPI_SUCCESS) { /* ADIOI_PVFS2_INIT handles creating error codes on its own */ return; } /* currently everyone gets their own credentials */ ADIOI_PVFS2_makecredentials(&(pvfs2_fs->credentials)); /* one process resolves name and will later bcast to others */ if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) { /* given the filename, figure out which pvfs filesystem it is on */ ret = PVFS_util_resolve(fd->filename, &cur_fs, pvfs_path, PVFS_NAME_MAX); if (ret < 0 ) { PVFS_perror("PVFS_util_resolve", ret); /* TODO: pick a good error for this */ o_status.error = -1; } else { fake_an_open(cur_fs, pvfs_path, fd->access_mode, fd->hints->striping_factor, fd->hints->striping_unit, pvfs2_fs, &o_status); } /* store credentials and object reference in fd */ pvfs2_fs->object_ref = o_status.object_ref; fd->fs_ptr = pvfs2_fs; } /* broadcast status and (possibly valid) object reference */ MPI_Address(&o_status.error, &offsets[0]); MPI_Address(&o_status.object_ref, &offsets[1]); MPI_Type_struct(2, lens, offsets, types, &open_status_type); MPI_Type_commit(&open_status_type); /* Assertion: if we hit this Bcast, then all processes collectively * called this open. * * That's because deferred open never happens with PVFS2. */ MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0], fd->comm); MPI_Type_free(&open_status_type); /* --BEGIN ERROR HANDLING-- */ if (o_status.error != 0) { ADIOI_Free(pvfs2_fs); *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(o_status.error), "Unknown error", 0); /* TODO: FIX STRING */ return; } /* --END ERROR HANDLING-- */ pvfs2_fs->object_ref = o_status.object_ref; fd->fs_ptr = pvfs2_fs; *error_code = MPI_SUCCESS; return; }
int main(int argc, char *argv[]) { /* Variable declarations */ int a[100][100], b[100][100]; MPI_Datatype row, xpose; MPI_Aint sizeofint; int /* err, */ errs = 0; int bufsize, position = 0; void *buffer; int i, j; /* Initialize a to some known values. */ for(i = 0; i < 100; i++) { for(j = 0; j < 100; j++) { a[i][j] = i*1000+j; b[i][j] = -1; } } /* Initialize MPI */ MPI_Init(&argc, &argv); parse_args(argc, argv); MPI_Type_extent(MPI_INT, &sizeofint); /* Create datatypes. */ MPI_Type_vector(100, 1, 100, MPI_INT, &row); MPI_Type_hvector(100, 1, sizeofint, row, &xpose); MPI_Type_commit(&xpose); /* Pack it. */ MPI_Pack_size(1, xpose, MPI_COMM_WORLD, &bufsize); buffer = (char *) malloc((unsigned) bufsize); /* To improve reporting of problems about operations, we change the error handler to errors return */ MPI_Comm_set_errhandler( MPI_COMM_WORLD, MPI_ERRORS_RETURN ); /* err = */ MPI_Pack(a, 1, xpose, buffer, bufsize, &position, MPI_COMM_WORLD); /* Unpack the buffer into b. */ position = 0; /* err = */ MPI_Unpack(buffer, bufsize, &position, b, 100*100, MPI_INT, MPI_COMM_WORLD); for (i = 0; i < 100; i++) { for (j = 0; j < 100; j++) { if(b[i][j] != a[j][i]) { errs++; if (verbose) fprintf(stderr, "b[%d][%d] = %d, should be %d\n", i, j, b[i][j], a[j][i]); } } } MPI_Type_free(&xpose); MPI_Type_free(&row); /* print message and exit */ if (errs) { fprintf(stderr, "Found %d errors\n", errs); } else { printf(" No Errors\n"); } MPI_Finalize(); free(buffer); return 0; }