/* ADIOI_GEN_IwriteContig * * This code handles only the case where ROMIO_HAVE_WORKING_AIO is * defined. We post an asynchronous I/O operations using the appropriate aio * routines. Otherwise, the ADIOI_Fns_struct will point to the FAKE * version. */ void ADIOI_GEN_IwriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) { MPI_Count len, typesize; int aio_errno = 0; static char myname[] = "ADIOI_GEN_IWRITECONTIG"; MPI_Type_size_x(datatype, &typesize); len = count * typesize; ADIOI_Assert(len == (int)((ADIO_Offset)count * (ADIO_Offset)typesize)); /* the count is an int parm */ if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; /* Cast away the const'ness of 'buf' as ADIOI_GEN_aio is used for * both read and write calls */ aio_errno = ADIOI_GEN_aio(fd, (char *) buf, len, offset, 1, request); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; fd->fp_sys_posn = -1; /* --BEGIN ERROR HANDLING-- */ if (aio_errno != 0) { MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code); return; } /* --END ERROR HANDLING-- */ *error_code = MPI_SUCCESS; }
void ADIOI_TESTFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status * status, int *error_code) { int myrank, nprocs; MPI_Count datatype_size; *error_code = MPI_SUCCESS; MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &datatype_size); FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_ReadContig called on %s\n", myrank, nprocs, fd->filename); if (file_ptr_type != ADIO_EXPLICIT_OFFSET) { offset = fd->fp_ind; fd->fp_ind += datatype_size * count; fd->fp_sys_posn = fd->fp_ind; } else { fd->fp_sys_posn = offset + datatype_size * count; } FPRINTF(stdout, "[%d/%d] reading (buf = %p, loc = %lld, sz = %lld)\n", myrank, nprocs, buf, (long long) offset, (long long) datatype_size * count); #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, datatype_size * count); #endif }
/* ADIOI_TESTFS_IreadContig() * * Implemented by immediately calling ReadContig() */ void ADIOI_TESTFS_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) { ADIO_Status status; int myrank, nprocs; MPI_Count typesize, len; *error_code = MPI_SUCCESS; MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &typesize); FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IreadContig called on %s\n", myrank, nprocs, fd->filename); FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_ReadContig\n", myrank, nprocs); len = count * typesize; ADIOI_TESTFS_ReadContig(fd, buf, len, MPI_BYTE, file_ptr_type, offset, &status, error_code); MPIO_Completed_request_create(&fd, len, error_code, request); }
/* ADIOI_GEN_IreadContig * * This code handles two distinct cases. If ROMIO_HAVE_WORKING_AIO is not * defined, then I/O is performed in a blocking manner. Otherwise we post * an asynchronous I/O operation using the appropriate aio routines. * * In the aio case we rely on ADIOI_GEN_aio(), which is implemented in * common/ad_iwrite.c. */ void ADIOI_GEN_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int *error_code) { MPI_Count len, typesize; int aio_errno = 0; static char myname[] = "ADIOI_GEN_IREADCONTIG"; MPI_Type_size_x(datatype, &typesize); ADIOI_Assert((count * typesize) == ((ADIO_Offset)(unsigned)count * (ADIO_Offset)typesize)); len = count * typesize; if (file_ptr_type == ADIO_INDIVIDUAL) offset = fd->fp_ind; aio_errno = ADIOI_GEN_aio(fd, buf, len, offset, 0, request); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += len; fd->fp_sys_posn = -1; /* --BEGIN ERROR HANDLING-- */ if (aio_errno != 0) { MPIO_ERR_CREATE_CODE_ERRNO(myname, aio_errno, error_code); return; } /* --END ERROR HANDLING-- */ *error_code = MPI_SUCCESS; }
int testtype(MPI_Datatype type, MPI_Offset expected) { MPI_Count size, lb, extent; int nerrors=0; MPI_Type_size_x(type, &size); if (size < 0) { printf("ERROR: type size apparently overflowed integer\n"); nerrors++; } if (size != expected) { printf("reported type size %lld does not match expected %lld\n", size, expected); nerrors++; } MPI_Type_get_true_extent_x(type, &lb, &extent); if (lb != 0) { printf("ERROR: type should have lb of 0, reported %lld\n", lb); nerrors ++; } if (extent != size) { printf("ERROR: extent should match size, not %lld\n", extent); nerrors ++; } return nerrors; }
void ADIOI_TESTFS_IwriteStrided(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) { ADIO_Status status; int myrank, nprocs; MPI_Count typesize; *error_code = MPI_SUCCESS; MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); MPI_Type_size_x(datatype, &typesize); FPRINTF(stdout, "[%d/%d] ADIOI_TESTFS_IwriteStrided called on %s\n", myrank, nprocs, fd->filename); FPRINTF(stdout, "[%d/%d] calling ADIOI_TESTFS_WriteStrided\n", myrank, nprocs); ADIOI_TESTFS_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, &status, error_code); MPIO_Completed_request_create(&fd, count*typesize, error_code, request); }
void ADIOI_NFS_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { ssize_t err=-1; MPI_Count datatype_size, len; ADIO_Offset bytes_xfered=0; size_t wr_count; static char myname[] = "ADIOI_NFS_WRITECONTIG"; char *p; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * (ADIO_Offset)count; if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; } p = (char *)buf; while (bytes_xfered < len) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif wr_count = len - bytes_xfered; /* work around FreeBSD and OS X defects*/ if (wr_count > INT_MAX) wr_count = INT_MAX; ADIOI_WRITE_LOCK(fd, offset+bytes_xfered, SEEK_SET, wr_count); err = pwrite(fd->fd_sys, p, wr_count, offset+bytes_xfered); /* --BEGIN ERROR HANDLING-- */ if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); fd->fp_sys_posn = -1; return; } /* --END ERROR HANDLING-- */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif ADIOI_UNLOCK(fd, offset+bytes_xfered, SEEK_SET, wr_count); bytes_xfered += err; p += err; } fd->fp_sys_posn = offset + bytes_xfered; if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += bytes_xfered; } #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bytes_xfered); #endif *error_code = MPI_SUCCESS; }
void ADIOI_HFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { MPI_Count err=-1, datatype_size, len; #ifndef PRINT_ERR_MSG static char myname[] = "ADIOI_HFS_READCONTIG"; #endif MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; #ifdef SPPUX fd->fp_sys_posn = -1; /* set it to null, since we are using pread */ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) err = pread64(fd->fd_sys, buf, len, offset); else { /* read from curr. location of ind. file pointer */ err = pread64(fd->fd_sys, buf, len, fd->fp_ind); fd->fp_ind += err; } #endif #ifdef HPUX if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { if (fd->fp_sys_posn != offset) lseek64(fd->fd_sys, offset, SEEK_SET); err = read(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) lseek64(fd->fd_sys, fd->fp_ind, SEEK_SET); err = read(fd->fd_sys, buf, len); fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; } #endif #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); #endif if (err == -1 ) { #ifdef MPICH *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) *error_code = (err == -1) ? MPI_ERR_UNKNOWN : MPI_SUCCESS; #else /* MPICH-1 */ *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; }
void ompi_type_size_x_f(MPI_Fint *type, MPI_Count *size, MPI_Fint *ierr) { int c_ierr; MPI_Datatype c_type = MPI_Type_f2c(*type); OMPI_SINGLE_NAME_DECL(size); c_ierr = MPI_Type_size_x(c_type, size); if (NULL != ierr) *ierr = OMPI_INT_2_FINT(c_ierr); }
static int verify_type(char *filename, MPI_Datatype type, int64_t expected_extent, int do_coll) { int rank, canary; MPI_Count tsize; int compare=-1; int errs=0, toterrs=0; MPI_Status status; MPI_File fh; MPI_Comm_rank(MPI_COMM_WORLD, &rank); CHECK( MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR, MPI_INFO_NULL, &fh)); CHECK( MPI_File_set_view(fh, rank*sizeof(int), MPI_BYTE, type, "native", MPI_INFO_NULL)); MPI_Type_size_x(type, &tsize); canary=rank+1000000; /* skip over first instance of type */ if (do_coll) { CHECK( MPI_File_write_at_all(fh, tsize, &canary, 1, MPI_INT, &status)); } else { CHECK( MPI_File_write_at(fh, tsize, &canary, 1, MPI_INT, &status)); } CHECK( MPI_File_set_view(fh, 0, MPI_INT, MPI_INT, "native", MPI_INFO_NULL)); if (do_coll) { CHECK( MPI_File_read_at_all(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } else { CHECK( MPI_File_read_at(fh, expected_extent/sizeof(int)+rank, &compare, 1, MPI_INT, &status)); } if (compare != canary) errs=1; MPI_Allreduce(&errs, &toterrs, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); MPI_File_close(&fh); if (toterrs) { printf("%d: got %d expected %d\n", rank, compare, canary); /* keep file if there's an error */ } else { if (rank == 0) MPI_File_delete(filename, MPI_INFO_NULL); } return (toterrs); }
void ADIOI_PANFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { MPI_Count err = -1, datatype_size, len; static char myname[] = "ADIOI_PANFS_READCONTIG"; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; } if (fd->fp_sys_posn != offset) { err = lseek(fd->fd_sys, offset, SEEK_SET); /* --BEGIN ERROR HANDLING-- */ if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); fd->fp_sys_posn = -1; return; } /* --END ERROR HANDLING-- */ } AD_PANFS_RETRY(read(fd->fd_sys, buf, len),err) /* --BEGIN ERROR HANDLING-- */ if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); fd->fp_sys_posn = -1; return; } /* --END ERROR HANDLING-- */ fd->fp_sys_posn = offset + err; if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += err; } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); #endif *error_code = MPI_SUCCESS; }
static void typestats(MPI_Datatype type) { MPI_Aint lb, extent; MPI_Count size; MPI_Type_get_extent(type, &lb, &extent); MPI_Type_size_x(type, &size); printf("dtype %d: lb = %ld extent = %ld size = %ld...", type, (long)lb, (long)extent, size); }
void ADIOI_PIOFS_WriteContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { MPI_Count err=-1, datatype_size, len; #ifndef PRINT_ERR_MSG static char myname[] = "ADIOI_PIOFS_WRITECONTIG"; #endif MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { if (fd->fp_sys_posn != offset) { llseek(fd->fd_sys, offset, SEEK_SET); } err = write(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ } else { /* write from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { llseek(fd->fd_sys, fd->fp_ind, SEEK_SET); } err = write(fd->fd_sys, buf, len); fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); #endif if (err == -1) { #ifdef MPICH *error_code = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); #elif defined(PRINT_ERR_MSG) *error_code = MPI_ERR_UNKNOWN; #else *error_code = MPIR_Err_setmsg(MPI_ERR_IO, MPIR_ADIO_ERROR, myname, "I/O Error", "%s", strerror(errno)); ADIOI_Error(fd, *error_code, myname); #endif } else *error_code = MPI_SUCCESS; }
void ADIOI_Get_position(ADIO_File fd, ADIO_Offset *offset) { ADIOI_Flatlist_node *flat_file; int i, flag; MPI_Count filetype_size, etype_size; int filetype_is_contig; MPI_Aint filetype_extent; ADIO_Offset disp, byte_offset, sum=0, size_in_file, n_filetypes, frd_size; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; if (filetype_is_contig) *offset = (fd->fp_ind - fd->disp)/etype_size; else { flat_file = ADIOI_Flatten_and_find(fd->filetype); MPI_Type_size_x(fd->filetype, &filetype_size); MPI_Type_extent(fd->filetype, &filetype_extent); disp = fd->disp; byte_offset = fd->fp_ind; n_filetypes = -1; flag = 0; while (!flag) { sum = 0; n_filetypes++; for (i=0; i<flat_file->count; i++) { sum += flat_file->blocklens[i]; if (disp + flat_file->indices[i] + n_filetypes* ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] >= byte_offset) { frd_size = disp + flat_file->indices[i] + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + flat_file->blocklens[i] - byte_offset; sum -= frd_size; flag = 1; break; } } } size_in_file = n_filetypes * (ADIO_Offset)filetype_size + sum; *offset = size_in_file/etype_size; } }
/* Generic implementation of IreadStrided calls the blocking ReadStrided * immediately. */ void ADIOI_FAKE_IreadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) { ADIO_Status status; MPI_Count typesize; MPI_Offset nbytes=0; /* Call the blocking function. It will create an error code * if necessary. */ ADIO_ReadStrided(fd, buf, count, datatype, file_ptr_type, offset, &status, error_code); if (*error_code == MPI_SUCCESS) { MPI_Type_size_x(datatype, &typesize); nbytes = (MPI_Offset)count*(MPI_Offset)typesize; } MPIO_Completed_request_create(&fd, nbytes, error_code, request); }
void ADIOI_Get_byte_offset(ADIO_File fd, ADIO_Offset offset, ADIO_Offset * disp) { ADIOI_Flatlist_node *flat_file; int i; ADIO_Offset n_filetypes, etype_in_filetype, sum, abs_off_in_filetype = 0, size_in_filetype; MPI_Count n_etypes_in_filetype, filetype_size, etype_size; int filetype_is_contig; MPI_Aint filetype_extent; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); etype_size = fd->etype_size; if (filetype_is_contig) *disp = fd->disp + etype_size * offset; else { flat_file = ADIOI_Flatten_and_find(fd->filetype); MPI_Type_size_x(fd->filetype, &filetype_size); n_etypes_in_filetype = filetype_size / etype_size; n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; for (i = 0; i < flat_file->count; i++) { sum += flat_file->blocklens[i]; if (sum > size_in_filetype) { abs_off_in_filetype = flat_file->indices[i] + size_in_filetype - (sum - flat_file->blocklens[i]); break; } } /* abs. offset in bytes in the file */ MPI_Type_extent(fd->filetype, &filetype_extent); *disp = fd->disp + n_filetypes * ADIOI_AINT_CAST_TO_OFFSET filetype_extent + abs_off_in_filetype; } }
void ADIOI_PFS_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { MPI_Count err=-1, datatype_size, len; static char myname[] = "ADIOI_PFS_READCONTIG"; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { if (fd->fp_sys_posn != offset) { lseek(fd->fd_sys, offset, SEEK_SET); } err = _cread(fd->fd_sys, buf, len); fd->fp_sys_posn = offset + err; /* individual file pointer not updated */ } else { /* read from curr. location of ind. file pointer */ if (fd->fp_sys_posn != fd->fp_ind) { lseek(fd->fd_sys, fd->fp_ind, SEEK_SET); } err = _cread(fd->fd_sys, buf, len); fd->fp_ind += err; fd->fp_sys_posn = fd->fp_ind; } #ifdef HAVE_STATUS_SET_BYTES if (err != -1) MPIR_Status_set_bytes(status, datatype, err); #endif if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); } else *error_code = MPI_SUCCESS; }
/* Since ADIOI_Flatten_datatype won't add a contig datatype to the * ADIOI_Flatlist, we can force it to do so with this function. */ ADIOI_Flatlist_node * ADIOI_Add_contig_flattened(MPI_Datatype contig_type) { MPI_Count contig_type_sz = -1; ADIOI_Flatlist_node *flat_node_p = ADIOI_Flatlist; /* Add contig type to the end of the list if it doesn't already * exist. */ while (flat_node_p->next) { if (flat_node_p->type == contig_type) return flat_node_p; flat_node_p = flat_node_p->next; } if (flat_node_p->type == contig_type) return flat_node_p; MPI_Type_size_x(contig_type, &contig_type_sz); if ((flat_node_p->next = (ADIOI_Flatlist_node *) ADIOI_Malloc (sizeof(ADIOI_Flatlist_node))) == NULL) { fprintf(stderr, "ADIOI_Add_contig_flattened: malloc next failed\n"); } flat_node_p = flat_node_p->next; flat_node_p->type = contig_type; if ((flat_node_p->blocklens = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL) { fprintf(stderr, "ADIOI_Flatlist_node: malloc blocklens failed\n"); } if ((flat_node_p->indices = (ADIO_Offset *) ADIOI_Malloc(sizeof(ADIO_Offset))) == NULL) { fprintf(stderr, "ADIOI_Flatlist_node: malloc indices failed\n"); } flat_node_p->blocklens[0] = contig_type_sz; flat_node_p->indices[0] = 0; flat_node_p->count = 1; flat_node_p->next = NULL; return flat_node_p; }
static void ADIOI_GEN_IreadStridedColl_fini(ADIOI_NBC_Request *nbc_req, int *error_code) { ADIOI_GEN_IreadStridedColl_vars *vars = nbc_req->data.rd.rsc_vars; MPI_Count size; /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually read and placed in buf during collective I/O. */ MPI_Type_size_x(vars->datatype, &size); nbc_req->nbytes = size * vars->count; /* free the struct for parameters and variables */ if (nbc_req->data.rd.rsc_vars) { ADIOI_Free(nbc_req->data.rd.rsc_vars); nbc_req->data.rd.rsc_vars = NULL; } /* make the request complete */ *error_code = MPI_Grequest_complete(nbc_req->req); nbc_req->data.rd.state = ADIOI_IRC_STATE_COMPLETE; }
/* Generic implementation of IreadContig calls the blocking ReadContig * immediately. */ void ADIOI_FAKE_IreadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Request *request, int *error_code) { ADIO_Status status; MPI_Count typesize; MPI_Offset len; MPI_Type_size_x(datatype, &typesize); len = (MPI_Offset)count * (MPI_Offset)typesize; /* Call the blocking function. It will create an error code * if necessary. */ ADIOI_Assert(len == (int) len); /* the count is an int parm */ ADIO_ReadContig(fd, buf, (int)len, MPI_BYTE, file_ptr_type, offset, &status, error_code); if (*error_code != MPI_SUCCESS) { len=0; } MPIO_Completed_request_create(&fd, len, error_code, request); }
int main(int argc, char *argv[]) { int rank, size; MPI_Datatype type; int errs = 0, mpi_errno, errclass; MTest_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Errhandler_set(MPI_COMM_WORLD, MPI_ERRORS_RETURN); /* Checking type_size_x for NULL variable */ type = MPI_INT; mpi_errno = MPI_Type_size_x(type, NULL); MPI_Error_class(mpi_errno, &errclass); if (errclass != MPI_ERR_ARG) ++errs; MPI_Type_free(&type); MTest_Finalize(errs); return 0; }
void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int flag, int *error_code) { int ret; MPI_Count datatype_size, len; ADIOI_PVFS2_fs *pvfs_fs; ADIOI_AIO_Request *aio_req; static char myname[] = "ADIOI_PVFS2_AIO_contig"; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1); MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->mem_req)); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->file_req)); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { /* copy individual file pointer into offset variable, continue */ offset = fd->fp_ind; } if (flag == READ) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_a, 0, NULL ); #endif ret = PVFS_isys_read(pvfs_fs->object_ref, aio_req->file_req, offset, buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_b, 0, NULL ); #endif } else if (flag == WRITE) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_a, 0, NULL ); #endif ret = PVFS_isys_write(pvfs_fs->object_ref, aio_req->file_req, offset, buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_b, 0, NULL ); #endif } /* --BEGIN ERROR HANDLING-- */ if (ret < 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_isys_io", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ #ifdef HAVE_MPI_GREQUEST_EXTENSIONS /* posted. defered completion */ if (ret == 0) { if (ADIOI_PVFS2_greq_class == 0) { MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, ADIOI_PVFS2_aio_free_fn, MPIU_Greq_cancel_fn, ADIOI_PVFS2_aio_poll_fn, ADIOI_PVFS2_aio_wait_fn, &ADIOI_PVFS2_greq_class); } MPIX_Grequest_class_allocate(ADIOI_PVFS2_greq_class, aio_req, request); memcpy(&(aio_req->req), request, sizeof(*request)); } #else /* if generalized request extensions not available, we will have to process * this operation right here */ int error; ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_AIO_Contig", &error); if (ret == 0) { MPIO_Completed_request_create(&fd, len, error_code, request); } #endif /* immediate completion */ if (ret == 1) { MPIO_Completed_request_create(&fd, len, error_code, request); } if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += len; } fd->fp_sys_posn = offset + len; *error_code = MPI_SUCCESS; fn_exit: return; }
void ADIOI_GEN_ReadStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; ADIO_Offset i_offset, new_brd_size, brd_size, size; int i, j, k, st_index=0; MPI_Count num, bufsize; int n_etypes_in_filetype; ADIO_Offset n_filetypes, etype_in_filetype, st_n_filetypes, size_in_filetype; ADIO_Offset abs_off_in_filetype=0, new_frd_size, frd_size=0, st_frd_size; MPI_Count filetype_size, etype_size, buftype_size, partial_read; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off, req_len, sum; ADIO_Offset off, req_off, disp, end_offset=0, readbuf_off, start_off; char *readbuf, *tmp_buf, *value; int info_flag; unsigned max_bufsize, readbuf_len; ADIO_Status status1; if (fd->hints->ds_read == ADIOI_HINT_DISABLE) { /* if user has disabled data sieving on reads, use naive * approach instead. */ ADIOI_GEN_ReadStrided_naive(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } *error_code = MPI_SUCCESS; /* changed below if error */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); MPI_Type_size_x(fd->filetype, &filetype_size); if ( ! filetype_size ) { #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif *error_code = MPI_SUCCESS; return; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; ADIOI_Assert((buftype_size * count) == ((ADIO_Offset)(MPI_Count)buftype_size * (ADIO_Offset)count)); bufsize = buftype_size * count; /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL+1)*sizeof(char)); ADIOI_Info_get(fd->info, "ind_rd_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); if (!buftype_is_contig && filetype_is_contig) { /* noncontiguous in memory, contiguous in file. */ flat_buf = ADIOI_Flatten_and_find(datatype); off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + (ADIO_Offset)etype_size * offset; start_off = off; end_offset = off + bufsize - 1; readbuf_off = off; readbuf = (char *) ADIOI_Malloc(max_bufsize); readbuf_len = (unsigned) (MPL_MIN(max_bufsize, end_offset-readbuf_off+1)); /* if atomicity is true, lock (exclusive) the region to be accessed */ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); ADIO_ReadContig(fd, readbuf, readbuf_len, MPI_BYTE, ADIO_EXPLICIT_OFFSET, readbuf_off, &status1, error_code); if (*error_code != MPI_SUCCESS) return; for (j=0; j<count; j++) { for (i=0; i<flat_buf->count; i++) { userbuf_off = (ADIO_Offset)j*(ADIO_Offset)buftype_extent + flat_buf->indices[i]; req_off = off; req_len = flat_buf->blocklens[i]; ADIOI_BUFFERED_READ off += flat_buf->blocklens[i]; } } if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; ADIOI_Free(readbuf); } else { /* noncontiguous in file */ flat_file = ADIOI_Flatten_and_find(fd->filetype); disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { /* Wei-keng reworked type processing to be a bit more efficient */ offset = fd->fp_ind - disp; n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; offset -= (ADIO_Offset)n_filetypes * filetype_extent; /* now offset is local to this extent */ /* find the block where offset is located, skip blocklens[i]==0 */ for (i=0; i<flat_file->count; i++) { ADIO_Offset dist; if (flat_file->blocklens[i] == 0) continue; dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; /* frd_size is from offset to the end of block i */ if (dist == 0) { i++; offset = flat_file->indices[i]; frd_size = flat_file->blocklens[i]; break; } if (dist > 0) { frd_size = dist; break; } } st_index = i; /* starting index in flat_file->indices[] */ offset += disp + (ADIO_Offset)n_filetypes*filetype_extent; } else { n_etypes_in_filetype = filetype_size/etype_size; n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; for (i=0; i<flat_file->count; i++) { sum += flat_file->blocklens[i]; if (sum > size_in_filetype) { st_index = i; frd_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[i] + size_in_filetype - (sum - flat_file->blocklens[i]); break; } } /* abs. offset in bytes in the file */ offset = disp + (ADIO_Offset) n_filetypes*filetype_extent + abs_off_in_filetype; } start_off = offset; /* Wei-keng Liao: read request is within a single flat_file contig * block e.g. with subarray types that actually describe the whole * array */ if (buftype_is_contig && bufsize <= frd_size) { /* a count of bytes can overflow. operate on original type instead */ ADIO_ReadContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { /* update MPI-IO file pointer to point to the first byte that * can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == frd_size) { do { st_index++; if (st_index == flat_file->count) { st_index = 0; n_filetypes++; } } while (flat_file->blocklens[st_index] == 0); fd->fp_ind = disp + flat_file->indices[st_index] + n_filetypes*filetype_extent; } } fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); #endif return; } /* Calculate end_offset, the last byte-offset that will be accessed. e.g., if start_offset=0 and 100 bytes to be read, end_offset=99*/ st_frd_size = frd_size; st_n_filetypes = n_filetypes; i_offset = 0; j = st_index; off = offset; frd_size = MPL_MIN(st_frd_size, bufsize); while (i_offset < bufsize) { i_offset += frd_size; end_offset = off + frd_size - 1; j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; while (flat_file->blocklens[j]==0) { j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; frd_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset); } /* if atomicity is true, lock (exclusive) the region to be accessed */ if ((fd->atomicity) && ADIO_Feature(fd, ADIO_LOCKS)) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset-start_off+1); readbuf_off = 0; readbuf_len = 0; readbuf = (char *) ADIOI_Malloc(max_bufsize); if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most common case. */ i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; frd_size = MPL_MIN(st_frd_size, bufsize); while (i_offset < bufsize) { if (frd_size) { /* TYPE_UB and TYPE_LB can result in frd_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); err = read(fd->fd_sys, ((char *) buf) + i, frd_size);*/ req_off = off; req_len = frd_size; userbuf_off = i_offset; ADIOI_BUFFERED_READ } i_offset += frd_size; if (off + frd_size < disp + flat_file->indices[j] + flat_file->blocklens[j] + n_filetypes*(ADIO_Offset)filetype_extent) off += frd_size; /* did not reach end of contiguous block in filetype. no more I/O needed. off is incremented by frd_size. */ else { j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; while (flat_file->blocklens[j]==0) { j = (j+1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + n_filetypes*(ADIO_Offset)filetype_extent; frd_size = MPL_MIN(flat_file->blocklens[j], bufsize-i_offset); } } } else {
/*----< main() >------------------------------------------------------------*/ int main(int argc, char **argv) { int i, j, err, rank, np, num_io; char *buf, *filename; int rank_dim[2], array_of_sizes[2]; int array_of_subsizes[2]; int count, *blocklengths, global_array_size; MPI_Count ftype_size; MPI_Aint *displacements; MPI_File fh; MPI_Datatype ftype; MPI_Request *request; MPI_Status *statuses; MPI_Status status; MPI_Offset offset = 0; int nr_errors = 0; #ifdef VERBOSE int k; #endif MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &np); if (np != 4) { if (!rank) printf("Please run with 4 processes. Exiting ...\n\n"); MPI_Finalize(); return 1; } filename = (argc > 1) ? argv[1] : "testfile"; num_io = 2; request = (MPI_Request *) malloc(num_io * sizeof(MPI_Request)); statuses = (MPI_Status *) malloc(num_io * sizeof(MPI_Status)); /*-----------------------------------------------------------------------*/ /* process rank in each dimension */ rank_dim[0] = rank / 2; rank_dim[1] = rank % 2; /* global 2D array size */ array_of_sizes[0] = YLEN * 2; array_of_sizes[1] = XLEN * 2; global_array_size = array_of_sizes[0] * array_of_sizes[1]; array_of_subsizes[0] = YLEN / 2; array_of_subsizes[1] = XLEN * SUB_XLEN / 5; offset = rank_dim[0] * YLEN * array_of_sizes[1] + rank_dim[1] * XLEN; /* define data type for file view */ count = array_of_subsizes[0] * 2; /* 2 is the no. blocks along X */ blocklengths = (int *) malloc(count * sizeof(int)); displacements = (MPI_Aint *) malloc(count * sizeof(MPI_Aint)); for (i = 0; i < count; i++) blocklengths[i] = array_of_subsizes[1] / 2; for (i = 0; i < array_of_subsizes[0]; i++) for (j = 0; j < 2; j++) displacements[i * 2 + j] = offset + i * 2 * array_of_sizes[1] + j * XLEN / 2; MPI_Type_create_hindexed(count, blocklengths, displacements, MPI_CHAR, &ftype); MPI_Type_commit(&ftype); MPI_Type_size_x(ftype, &ftype_size); /* subarray's layout in the global array P0's 's layout P1's layout [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] [ 0] 0 1 2 3 4 5 | D E F G H I [ 1] | [ 2] 6 7 8 9 : ; | J K L M N O [ 3] | [ 4] | [ 5] | [ 6] | [ 7] | [ 8] | [ 9] | P2's 's layout P3's layout [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] | [ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] [ 0] | [ 1] | [ 2] | [ 3] | [ 4] | [ 5] X Y Z [ \ ] | l m n o p q [ 6] | [ 7] ^ _ ` a b c | r s t u v w [ 8] | [ 9] | */ /* initialize the write buffer */ buf = (char *) malloc(array_of_subsizes[0] * array_of_subsizes[1]); for (i = 0; i < array_of_subsizes[0] * array_of_subsizes[1]; i++) buf[i] = '0' + rank * 20 + i % 79; /* zero file contents --------------------------------------------------- */ if (rank == 0) { char *wr_buf = (char *) calloc(num_io * global_array_size, 1); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); MPI_File_write(fh, wr_buf, num_io * global_array_size, MPI_CHAR, &status); MPI_File_close(&fh); free(wr_buf); } /* open the file -------------------------------------------------------- */ err = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); if (err != MPI_SUCCESS) { printf("Error: MPI_File_open() filename %s\n", filename); MPI_Abort(MPI_COMM_WORLD, -1); exit(1); } /* MPI nonblocking collective write */ for (i = 0; i < num_io; i++) { offset = i * global_array_size; /* set the file view */ MPI_File_set_view(fh, offset, MPI_BYTE, ftype, "native", MPI_INFO_NULL); MPI_File_iwrite_all(fh, buf, ftype_size, MPI_CHAR, &request[i]); } MPI_Waitall(num_io, request, statuses); MPI_File_close(&fh); /* read and print file contents ----------------------------------------- */ if (rank == 0) { char *ptr; char *rd_buf = (char *) calloc(num_io * global_array_size, 1); MPI_File_open(MPI_COMM_SELF, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, &fh); MPI_File_read(fh, rd_buf, num_io * global_array_size, MPI_CHAR, &status); MPI_File_close(&fh); #ifdef VERBOSE printf("-------------------------------------------------------\n"); printf(" ["); for (i = 0; i < 2; i++) { for (j = 0; j < XLEN; j++) printf(" %d", j); printf(" "); } printf("]\n\n"); ptr = rd_buf; for (k = 0; k < num_io; k++) { for (i = 0; i < 2 * YLEN; i++) { printf("[%2d]", k * 2 * YLEN + i); for (j = 0; j < 2 * XLEN; j++) { if (j > 0 && j % XLEN == 0) printf(" "); if (*ptr != 0) printf(" %c", *ptr); else printf(" "); ptr++; } printf("\n"); } printf("\n"); } #endif ptr = rd_buf; for (i = 0; i < 2 * YLEN * num_io; i++) { for (j = 0; j < 2 * XLEN; j++) { if (*ptr != compare_buf[i][j]) { fprintf(stderr, "expected %d got %d at [%d][%d]\n", *ptr, compare_buf[i][j], i, j); nr_errors++; } ptr++; } } free(rd_buf); if (nr_errors == 0) fprintf(stdout, " No Errors\n"); else fprintf(stderr, "Found %d errors\n", nr_errors); } free(blocklengths); free(displacements); free(buf); free(request); free(statuses); MPI_Type_free(&ftype); MPI_Finalize(); return 0; }
void ADIOI_NFS_WriteStrided(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status * status, int *error_code) { /* offset is in units of etype relative to the filetype. */ ADIOI_Flatlist_node *flat_buf, *flat_file; int i, j, k, err = -1, bwr_size, st_index = 0; ADIO_Offset i_offset, sum, size_in_filetype; ADIO_Offset num, size, n_etypes_in_filetype; MPI_Count bufsize; ADIO_Offset n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype = 0; int req_len; MPI_Count filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset userbuf_off; ADIO_Offset off, req_off, disp, end_offset = 0, writebuf_off, start_off; char *writebuf = NULL, *value; int st_n_filetypes, writebuf_len, write_sz; ADIO_Offset fwr_size = 0, new_fwr_size, st_fwr_size; int new_bwr_size, err_flag = 0, info_flag, max_bufsize; static char myname[] = "ADIOI_NFS_WRITESTRIDED"; ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); MPI_Type_size_x(fd->filetype, &filetype_size); if (!filetype_size) { #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif *error_code = MPI_SUCCESS; return; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; bufsize = buftype_size * count; /* get max_bufsize from the info object. */ value = (char *) ADIOI_Malloc((MPI_MAX_INFO_VAL + 1) * sizeof(char)); ADIOI_Info_get(fd->info, "ind_wr_buffer_size", MPI_MAX_INFO_VAL, value, &info_flag); max_bufsize = atoi(value); ADIOI_Free(value); if (!buftype_is_contig && filetype_is_contig) { /* noncontiguous in memory, contiguous in file. */ flat_buf = ADIOI_Flatten_and_find(datatype); off = (file_ptr_type == ADIO_INDIVIDUAL) ? fd->fp_ind : fd->disp + etype_size * offset; start_off = off; end_offset = off + bufsize - 1; writebuf_off = off; writebuf = (char *) ADIOI_Malloc(max_bufsize); writebuf_len = (int) (MPL_MIN(max_bufsize, end_offset - writebuf_off + 1)); /* if atomicity is true, lock the region to be accessed */ if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); for (j = 0; j < count; j++) for (i = 0; i < flat_buf->count; i++) { userbuf_off = j * buftype_extent + flat_buf->indices[i]; req_off = off; req_len = flat_buf->blocklens[i]; ADIOI_BUFFERED_WRITE_WITHOUT_READ off += flat_buf->blocklens[i]; } /* write the buffer out finally */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL); #endif lseek(fd->fd_sys, writebuf_off, SEEK_SET); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_lseek_b, 0, NULL); #endif if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif err = write(fd->fd_sys, writebuf, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif if (!(fd->atomicity)) ADIOI_UNLOCK(fd, writebuf_off, SEEK_SET, writebuf_len); if (err == -1) err_flag = 1; if (fd->atomicity) ADIOI_UNLOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind = off; if (err_flag) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "**io", "**io %s", strerror(errno)); } else *error_code = MPI_SUCCESS; } else { /* noncontiguous in file */ flat_file = ADIOI_Flatten_and_find(fd->filetype); disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { /* Wei-keng reworked type processing to be a bit more efficient */ offset = fd->fp_ind - disp; n_filetypes = (offset - flat_file->indices[0]) / filetype_extent; offset -= (ADIO_Offset) n_filetypes *filetype_extent; /* now offset is local to this extent */ /* find the block where offset is located, skip blocklens[i]==0 */ for (i = 0; i < flat_file->count; i++) { ADIO_Offset dist; if (flat_file->blocklens[i] == 0) continue; dist = flat_file->indices[i] + flat_file->blocklens[i] - offset; /* fwr_size is from offset to the end of block i */ if (dist == 0) { i++; offset = flat_file->indices[i]; fwr_size = flat_file->blocklens[i]; break; } if (dist > 0) { fwr_size = dist; break; } } st_index = i; /* starting index in flat_file->indices[] */ offset += disp + (ADIO_Offset) n_filetypes *filetype_extent; } else { n_etypes_in_filetype = filetype_size / etype_size; n_filetypes = offset / n_etypes_in_filetype; etype_in_filetype = offset % n_etypes_in_filetype; size_in_filetype = etype_in_filetype * etype_size; sum = 0; for (i = 0; i < flat_file->count; i++) { sum += flat_file->blocklens[i]; if (sum > size_in_filetype) { st_index = i; fwr_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[i] + size_in_filetype - (sum - flat_file->blocklens[i]); break; } } /* abs. offset in bytes in the file */ offset = disp + (ADIO_Offset) n_filetypes *filetype_extent + abs_off_in_filetype; } start_off = offset; /* Wei-keng Liao:write request is within single flat_file contig block */ /* this could happen, for example, with subarray types that are * actually fairly contiguous */ if (buftype_is_contig && bufsize <= fwr_size) { /* though MPI api has an integer 'count' parameter, derived * datatypes might describe more bytes than can fit into an integer. * if we've made it this far, we can pass a count of original * datatypes, instead of a count of bytes (which might overflow) * Other WriteContig calls in this path are operating on data * sieving buffer */ ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, offset, status, error_code); if (file_ptr_type == ADIO_INDIVIDUAL) { /* update MPI-IO file pointer to point to the first byte * that can be accessed in the fileview. */ fd->fp_ind = offset + bufsize; if (bufsize == fwr_size) { do { st_index++; if (st_index == flat_file->count) { st_index = 0; n_filetypes++; } } while (flat_file->blocklens[st_index] == 0); fd->fp_ind = disp + flat_file->indices[st_index] + (ADIO_Offset) n_filetypes *filetype_extent; } } fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); #endif goto fn_exit; } /* Calculate end_offset, the last byte-offset that will be accessed. * e.g., if start_offset=0 and 100 bytes to be write, end_offset=99 */ st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; i_offset = 0; j = st_index; off = offset; fwr_size = MPL_MIN(st_fwr_size, bufsize); while (i_offset < bufsize) { i_offset += fwr_size; end_offset = off + fwr_size - 1; j = (j + 1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; while (flat_file->blocklens[j] == 0) { j = (j + 1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + n_filetypes * (ADIO_Offset) filetype_extent; fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize - i_offset); } /* if atomicity is true, lock the region to be accessed */ if (fd->atomicity) ADIOI_WRITE_LOCK(fd, start_off, SEEK_SET, end_offset - start_off + 1); /* initial read for the read-modify-write */ writebuf_off = offset; writebuf = (char *) ADIOI_Malloc(max_bufsize); memset(writebuf, -1, max_bufsize); writebuf_len = (int) (MPL_MIN(max_bufsize, end_offset - writebuf_off + 1)); if (!(fd->atomicity)) ADIOI_WRITE_LOCK(fd, writebuf_off, SEEK_SET, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_lseek_a, 0, NULL); #endif lseek(fd->fd_sys, writebuf_off, SEEK_SET); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_lseek_b, 0, NULL); #endif #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_read_a, 0, NULL); #endif err = read(fd->fd_sys, writebuf, writebuf_len); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_read_b, 0, NULL); #endif if (err == -1) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_IO, "ADIOI_NFS_WriteStrided: ROMIO tries to optimize this access by doing a read-modify-write, but is unable to read the file. Please give the file read permission and open it with MPI_MODE_RDWR.", 0); goto fn_exit; } if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most common case. */ i_offset = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; fwr_size = MPL_MIN(st_fwr_size, bufsize); while (i_offset < bufsize) { if (fwr_size) { /* TYPE_UB and TYPE_LB can result in * fwr_size = 0. save system call in such cases */ /* lseek(fd->fd_sys, off, SEEK_SET); * err = write(fd->fd_sys, ((char *) buf) + i, fwr_size); */ req_off = off; req_len = fwr_size; userbuf_off = i_offset; ADIOI_BUFFERED_WRITE} i_offset += fwr_size; if (off + fwr_size < disp + flat_file->indices[j] + flat_file->blocklens[j] + n_filetypes * (ADIO_Offset) filetype_extent) off += fwr_size; /* did not reach end of contiguous block in filetype. * no more I/O needed. off is incremented by fwr_size. */ else { j = (j + 1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; while (flat_file->blocklens[j] == 0) { j = (j + 1) % flat_file->count; n_filetypes += (j == 0) ? 1 : 0; } off = disp + flat_file->indices[j] + n_filetypes * (ADIO_Offset) filetype_extent; fwr_size = MPL_MIN(flat_file->blocklens[j], bufsize - i_offset); } } } else {
int MPIOI_File_write_all(MPI_File fh, MPI_Offset offset, int file_ptr_type, const void *buf, int count, MPI_Datatype datatype, char *myname, MPI_Status *status) { int error_code; MPI_Count datatype_size; ADIO_File adio_fh; void *e32buf=NULL; const void *xbuf=NULL; MPIU_THREAD_CS_ENTER(ALLFUNC,); adio_fh = MPIO_File_resolve(fh); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_FILE_HANDLE(adio_fh, myname, error_code); MPIO_CHECK_COUNT(adio_fh, count, myname, error_code); MPIO_CHECK_DATATYPE(adio_fh, datatype, myname, error_code); if (file_ptr_type == ADIO_EXPLICIT_OFFSET && offset < 0) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "**iobadoffset", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; } /* --END ERROR HANDLING-- */ MPI_Type_size_x(datatype, &datatype_size); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(adio_fh, count, datatype_size, myname, error_code); MPIO_CHECK_WRITABLE(adio_fh, myname, error_code); MPIO_CHECK_NOT_SEQUENTIAL_MODE(adio_fh, myname, error_code); MPIO_CHECK_COUNT_SIZE(adio_fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; } ADIO_WriteStridedColl(adio_fh, xbuf, count, datatype, file_ptr_type, offset, status, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) error_code = MPIO_Err_return_file(adio_fh, error_code); /* --END ERROR HANDLING-- */ fn_exit: if (e32buf != NULL) ADIOI_Free(e32buf); MPIU_THREAD_CS_EXIT(ALLFUNC,); return error_code; }
void ADIOI_LUSTRE_WriteStridedColl(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { /* Uses a generalized version of the extended two-phase method described * in "An Extended Two-Phase Method for Accessing Sections of * Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, * Scientific Programming, (5)4:301--317, Winter 1996. * http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ ADIOI_Access *my_req; /* array of nprocs access structures, one for each other process has this process's request */ ADIOI_Access *others_req; /* array of nprocs access structures, one for each other process whose request is written by this process. */ int i, filetype_is_contig, nprocs, myrank, do_collect = 0; int contig_access_count = 0, buftype_is_contig, interleave_count = 0; int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs; ADIO_Offset orig_fp, start_offset, end_offset, off; ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *end_offsets = NULL; ADIO_Offset *len_list = NULL; int **buf_idx = NULL, *striping_info = NULL; int old_error, tmp_error; MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); orig_fp = fd->fp_ind; /* IO patten identification if cb_write isn't disabled */ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { /* For this process's request, calculate the list of offsets and lengths in the file and determine the start and end offsets. */ /* Note: end_offset points to the last byte-offset that will be accessed. * e.g., if start_offset=0 and 100 bytes to be read, end_offset=99 */ ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, &end_offset, &contig_access_count); /* each process communicates its start and end offsets to other * processes. The result is an array each of start and end offsets * stored in order of process rank. */ st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset)); end_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * sizeof(ADIO_Offset)); MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1, ADIO_OFFSET, fd->comm); MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1, ADIO_OFFSET, fd->comm); /* are the accesses of different processes interleaved? */ for (i = 1; i < nprocs; i++) if ((st_offsets[i] < end_offsets[i-1]) && (st_offsets[i] <= end_offsets[i])) interleave_count++; /* This is a rudimentary check for interleaving, but should suffice for the moment. */ /* Two typical access patterns can benefit from collective write. * 1) the processes are interleaved, and * 2) the req size is small. */ if (interleave_count > 0) { do_collect = 1; } else { do_collect = ADIOI_LUSTRE_Docollect(fd, contig_access_count, len_list, nprocs); } } ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); /* Decide if collective I/O should be done */ if ((!do_collect && fd->hints->cb_write == ADIOI_HINT_AUTO) || fd->hints->cb_write == ADIOI_HINT_DISABLE) { /* use independent accesses */ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { ADIOI_Free(offset_list); ADIOI_Free(len_list); ADIOI_Free(st_offsets); ADIOI_Free(end_offsets); } fd->fp_ind = orig_fp; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); if (buftype_is_contig && filetype_is_contig) { if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + (ADIO_Offset)(fd->etype_size) * offset; ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, off, status, error_code); } else ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL, 0, status, error_code); } else { ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); } return; } /* Get Lustre hints information */ ADIOI_LUSTRE_Get_striping_info(fd, &striping_info, 1); /* calculate what portions of the access requests of this process are * located in which process */ ADIOI_LUSTRE_Calc_my_req(fd, offset_list, len_list, contig_access_count, striping_info, nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); /* based on everyone's my_req, calculate what requests of other processes * will be accessed by this process. * count_others_req_procs = number of processes whose requests (including * this process itself) will be accessed by this process * count_others_req_per_proc[i] indicates how many separate contiguous * requests of proc. i will be accessed by this process. */ ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc, my_req, nprocs, myrank, &count_others_req_procs, &others_req); ADIOI_Free(count_my_req_per_proc); /* exchange data and write in sizes of no more than stripe_size. */ ADIOI_LUSTRE_Exch_and_write(fd, buf, datatype, nprocs, myrank, others_req, my_req, offset_list, len_list, contig_access_count, striping_info, buf_idx, error_code); /* If this collective write is followed by an independent write, * it's possible to have those subsequent writes on other processes * race ahead and sneak in before the read-modify-write completes. * We carry out a collective communication at the end here so no one * can start independent i/o before collective I/O completes. * * need to do some gymnastics with the error codes so that if something * went wrong, all processes report error, but if a process has a more * specific error code, we can still have that process report the * additional information */ old_error = *error_code; if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO; /* optimization: if only one process performing i/o, we can perform * a less-expensive Bcast */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL); #endif if (fd->hints->cb_nodes == 1) MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); else { tmp_error = *error_code; MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, MPI_MAX, fd->comm); } #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL); #endif if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO)) *error_code = old_error; if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); /* free all memory allocated for collective I/O */ /* free others_req */ for (i = 0; i < nprocs; i++) { if (others_req[i].count) { ADIOI_Free(others_req[i].offsets); ADIOI_Free(others_req[i].lens); ADIOI_Free(others_req[i].mem_ptrs); } } ADIOI_Free(others_req); /* free my_req here */ for (i = 0; i < nprocs; i++) { if (my_req[i].count) { ADIOI_Free(my_req[i].offsets); ADIOI_Free(my_req[i].lens); } } ADIOI_Free(my_req); for (i = 0; i < nprocs; i++) { ADIOI_Free(buf_idx[i]); } ADIOI_Free(buf_idx); ADIOI_Free(offset_list); ADIOI_Free(len_list); ADIOI_Free(st_offsets); ADIOI_Free(end_offsets); ADIOI_Free(striping_info); #ifdef HAVE_STATUS_SET_BYTES if (status) { MPI_Count bufsize, size; /* Don't set status if it isn't needed */ MPI_Type_size_x(datatype, &size); bufsize = size * count; MPIR_Status_set_bytes(status, datatype, bufsize); } /* This is a temporary way of filling in status. The right way is to * keep track of how much data was actually written during collective I/O. */ #endif fd->fp_sys_posn = -1; /* set it to null. */ }
void ADIOI_GEN_WriteStridedColl(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status * status, int *error_code) { /* Uses a generalized version of the extended two-phase method described in "An Extended Two-Phase Method for Accessing Sections of Out-of-Core Arrays", Rajeev Thakur and Alok Choudhary, Scientific Programming, (5)4:301--317, Winter 1996. http://www.mcs.anl.gov/home/thakur/ext2ph.ps */ ADIOI_Access *my_req; /* array of nprocs access structures, one for each other process in * whose file domain this process's request lies */ ADIOI_Access *others_req; /* array of nprocs access structures, one for each other process * whose request lies in this process's file domain. */ int i, filetype_is_contig, nprocs, nprocs_for_coll, myrank; int contig_access_count = 0, interleave_count = 0, buftype_is_contig; int *count_my_req_per_proc, count_my_req_procs, count_others_req_procs; ADIO_Offset orig_fp, start_offset, end_offset, fd_size, min_st_offset, off; ADIO_Offset *offset_list = NULL, *st_offsets = NULL, *fd_start = NULL, *fd_end = NULL, *end_offsets = NULL; MPI_Aint *buf_idx = NULL; ADIO_Offset *len_list = NULL; int old_error, tmp_error; if (fd->hints->cb_pfr != ADIOI_HINT_DISABLE) { /* Cast away const'ness as the below function is used for read * and write */ ADIOI_IOStridedColl(fd, (char *) buf, count, ADIOI_WRITE, datatype, file_ptr_type, offset, status, error_code); return; } MPI_Comm_size(fd->comm, &nprocs); MPI_Comm_rank(fd->comm, &myrank); /* the number of processes that actually perform I/O, nprocs_for_coll, * is stored in the hints off the ADIO_File structure */ nprocs_for_coll = fd->hints->cb_nodes; orig_fp = fd->fp_ind; /* only check for interleaving if cb_write isn't disabled */ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { /* For this process's request, calculate the list of offsets and * lengths in the file and determine the start and end offsets. */ /* Note: end_offset points to the last byte-offset that will be accessed. * e.g., if start_offset=0 and 100 bytes to be read, end_offset=99 */ ADIOI_Calc_my_off_len(fd, count, datatype, file_ptr_type, offset, &offset_list, &len_list, &start_offset, &end_offset, &contig_access_count); /* each process communicates its start and end offsets to other * processes. The result is an array each of start and end offsets stored * in order of process rank. */ st_offsets = (ADIO_Offset *) ADIOI_Malloc(nprocs * 2 * sizeof(ADIO_Offset)); end_offsets = st_offsets + nprocs; MPI_Allgather(&start_offset, 1, ADIO_OFFSET, st_offsets, 1, ADIO_OFFSET, fd->comm); MPI_Allgather(&end_offset, 1, ADIO_OFFSET, end_offsets, 1, ADIO_OFFSET, fd->comm); /* are the accesses of different processes interleaved? */ for (i = 1; i < nprocs; i++) if ((st_offsets[i] < end_offsets[i - 1]) && (st_offsets[i] <= end_offsets[i])) interleave_count++; /* This is a rudimentary check for interleaving, but should suffice * for the moment. */ } ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); if (fd->hints->cb_write == ADIOI_HINT_DISABLE || (!interleave_count && (fd->hints->cb_write == ADIOI_HINT_AUTO))) { /* use independent accesses */ if (fd->hints->cb_write != ADIOI_HINT_DISABLE) { ADIOI_Free(offset_list); ADIOI_Free(st_offsets); } fd->fp_ind = orig_fp; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); if (buftype_is_contig && filetype_is_contig) { if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + (ADIO_Offset) (fd->etype_size) * offset; ADIO_WriteContig(fd, buf, count, datatype, ADIO_EXPLICIT_OFFSET, off, status, error_code); } else ADIO_WriteContig(fd, buf, count, datatype, ADIO_INDIVIDUAL, 0, status, error_code); } else ADIO_WriteStrided(fd, buf, count, datatype, file_ptr_type, offset, status, error_code); return; } /* Divide the I/O workload among "nprocs_for_coll" processes. This is done by (logically) dividing the file into file domains (FDs); each process may directly access only its own file domain. */ ADIOI_Calc_file_domains(st_offsets, end_offsets, nprocs, nprocs_for_coll, &min_st_offset, &fd_start, &fd_end, fd->hints->min_fdomain_size, &fd_size, fd->hints->striping_unit); /* calculate what portions of the access requests of this process are located in what file domains */ ADIOI_Calc_my_req(fd, offset_list, len_list, contig_access_count, min_st_offset, fd_start, fd_end, fd_size, nprocs, &count_my_req_procs, &count_my_req_per_proc, &my_req, &buf_idx); /* based on everyone's my_req, calculate what requests of other processes lie in this process's file domain. count_others_req_procs = number of processes whose requests lie in this process's file domain (including this process itself) count_others_req_per_proc[i] indicates how many separate contiguous requests of proc. i lie in this process's file domain. */ ADIOI_Calc_others_req(fd, count_my_req_procs, count_my_req_per_proc, my_req, nprocs, myrank, &count_others_req_procs, &others_req); ADIOI_Free(count_my_req_per_proc); ADIOI_Free(my_req[0].offsets); ADIOI_Free(my_req); /* exchange data and write in sizes of no more than coll_bufsize. */ /* Cast away const'ness for the below function */ ADIOI_Exch_and_write(fd, (char *) buf, datatype, nprocs, myrank, others_req, offset_list, len_list, contig_access_count, min_st_offset, fd_size, fd_start, fd_end, buf_idx, error_code); /* If this collective write is followed by an independent write, * it's possible to have those subsequent writes on other processes * race ahead and sneak in before the read-modify-write completes. * We carry out a collective communication at the end here so no one * can start independent i/o before collective I/O completes. * * need to do some gymnastics with the error codes so that if something * went wrong, all processes report error, but if a process has a more * specific error code, we can still have that process report the * additional information */ old_error = *error_code; if (*error_code != MPI_SUCCESS) *error_code = MPI_ERR_IO; /* optimization: if only one process performing i/o, we can perform * a less-expensive Bcast */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_postwrite_a, 0, NULL); #endif if (fd->hints->cb_nodes == 1) MPI_Bcast(error_code, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); else { tmp_error = *error_code; MPI_Allreduce(&tmp_error, error_code, 1, MPI_INT, MPI_MAX, fd->comm); } #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_postwrite_b, 0, NULL); #endif #ifdef AGGREGATION_PROFILE MPE_Log_event(5012, 0, NULL); #endif if ((old_error != MPI_SUCCESS) && (old_error != MPI_ERR_IO)) *error_code = old_error; /* free all memory allocated for collective I/O */ ADIOI_Free(others_req[0].offsets); ADIOI_Free(others_req[0].mem_ptrs); ADIOI_Free(others_req); ADIOI_Free(buf_idx); ADIOI_Free(offset_list); ADIOI_Free(st_offsets); ADIOI_Free(fd_start); #ifdef HAVE_STATUS_SET_BYTES if (status) { MPI_Count bufsize, size; /* Don't set status if it isn't needed */ MPI_Type_size_x(datatype, &size); bufsize = size * count; MPIR_Status_set_bytes(status, datatype, bufsize); } /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written during collective I/O. */ #endif fd->fp_sys_posn = -1; /* set it to null. */ #ifdef AGGREGATION_PROFILE MPE_Log_event(5013, 0, NULL); #endif }
/*@ MPI_File_write_ordered - Collective write using shared file pointer Input Parameters: . fh - file handle (handle) . buf - initial address of buffer (choice) . count - number of elements in buffer (nonnegative integer) . datatype - datatype of each buffer element (handle) Output Parameters: . status - status object (Status) .N fortran @*/ int MPI_File_write_ordered(MPI_File fh, ROMIO_CONST void *buf, int count, MPI_Datatype datatype, MPI_Status *status) { int error_code, nprocs, myrank; ADIO_Offset incr; MPI_Count datatype_size; int source, dest; static char myname[] = "MPI_FILE_WRITE_ORDERED"; ADIO_Offset shared_fp; ADIO_File adio_fh; void *e32buf=NULL; const void *xbuf; adio_fh = MPIO_File_resolve(fh); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_FILE_HANDLE(adio_fh, myname, error_code); MPIO_CHECK_COUNT(adio_fh, count, myname, error_code); MPIO_CHECK_DATATYPE(adio_fh, datatype, myname, error_code); /* --END ERROR HANDLING-- */ MPI_Type_size_x(datatype, &datatype_size); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(adio_fh, count, datatype_size, myname, error_code); MPIO_CHECK_FS_SUPPORTS_SHARED(adio_fh, myname, error_code); MPIO_CHECK_COUNT_SIZE(adio_fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ ADIOI_TEST_DEFERRED(adio_fh, myname, &error_code); MPI_Comm_size(adio_fh->comm, &nprocs); MPI_Comm_rank(adio_fh->comm, &myrank); incr = (count*datatype_size)/adio_fh->etype_size; /* Use a message as a 'token' to order the operations */ source = myrank - 1; dest = myrank + 1; if (source < 0) source = MPI_PROC_NULL; if (dest >= nprocs) dest = MPI_PROC_NULL; MPI_Recv(NULL, 0, MPI_BYTE, source, 0, adio_fh->comm, MPI_STATUS_IGNORE); ADIO_Get_shared_fp(adio_fh, incr, &shared_fp, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, myname, __LINE__, MPI_ERR_INTERN, "**iosharedfailed", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; } /* --END ERROR HANDLING-- */ MPI_Send(NULL, 0, MPI_BYTE, dest, 0, adio_fh->comm); xbuf = buf; if (adio_fh->is_external32) { error_code = MPIU_external32_buffer_setup(buf, count, datatype, &e32buf); if (error_code != MPI_SUCCESS) goto fn_exit; xbuf = e32buf; } ADIO_WriteStridedColl(adio_fh, xbuf, count, datatype, ADIO_EXPLICIT_OFFSET, shared_fp, status, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) error_code = MPIO_Err_return_file(adio_fh, error_code); /* --END ERROR HANDLING-- */ fn_exit: if (e32buf != NULL) ADIOI_Free(e32buf); /* FIXME: Check for error code from WriteStridedColl? */ return error_code; }
int MPIOI_File_iread_all(MPI_File fh, MPI_Offset offset, int file_ptr_type, void *buf, int count, MPI_Datatype datatype, char *myname, MPI_Request *request) { int error_code; MPI_Count datatype_size; ADIO_File adio_fh; void *xbuf=NULL, *e32_buf=NULL; ROMIO_THREAD_CS_ENTER(); adio_fh = MPIO_File_resolve(fh); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_FILE_HANDLE(adio_fh, myname, error_code); MPIO_CHECK_COUNT(adio_fh, count, myname, error_code); MPIO_CHECK_DATATYPE(adio_fh, datatype, myname, error_code); if (file_ptr_type == ADIO_EXPLICIT_OFFSET && offset < 0) { error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "**iobadoffset", 0); error_code = MPIO_Err_return_file(adio_fh, error_code); goto fn_exit; } /* --END ERROR HANDLING-- */ MPI_Type_size_x(datatype, &datatype_size); /* --BEGIN ERROR HANDLING-- */ MPIO_CHECK_INTEGRAL_ETYPE(adio_fh, count, datatype_size, myname, error_code); MPIO_CHECK_READABLE(adio_fh, myname, error_code); MPIO_CHECK_NOT_SEQUENTIAL_MODE(adio_fh, myname, error_code); MPIO_CHECK_COUNT_SIZE(adio_fh, count, datatype_size, myname, error_code); /* --END ERROR HANDLING-- */ xbuf = buf; if (adio_fh->is_external32) { MPI_Aint e32_size = 0; error_code = MPIU_datatype_full_size(datatype, &e32_size); if (error_code != MPI_SUCCESS) goto fn_exit; e32_buf = ADIOI_Malloc(e32_size*count); xbuf = e32_buf; } ADIO_IreadStridedColl(adio_fh, xbuf, count, datatype, file_ptr_type, offset, request, &error_code); /* --BEGIN ERROR HANDLING-- */ if (error_code != MPI_SUCCESS) error_code = MPIO_Err_return_file(adio_fh, error_code); /* --END ERROR HANDLING-- */ if (e32_buf != NULL) { error_code = MPIU_read_external32_conversion_fn(buf, datatype, count, e32_buf); ADIOI_Free(e32_buf); } fn_exit: ROMIO_THREAD_CS_EXIT(); return error_code; }