void ADIOI_PVFS2_Delete(const char *filename, int *error_code) { PVFS_credentials credentials; PVFS_sysresp_getparent resp_getparent; int ret; PVFS_fs_id cur_fs; static char myname[] = "ADIOI_PVFS2_DELETE"; char pvfs_path[PVFS_NAME_MAX] = {0}; ADIOI_PVFS2_Init(error_code); /* --BEGIN ERROR HANDLING-- */ if (*error_code != MPI_SUCCESS) { /* ADIOI_PVFS2_INIT handles creating error codes itself */ return; } /* --END ERROR HANDLING-- */ /* in most cases we'll store the credentials in the fs struct, but we don't * have one of those in Delete */ ADIOI_PVFS2_makecredentials(&credentials); /* given the filename, figure out which pvfs filesystem it is on */ ret = PVFS_util_resolve(filename, &cur_fs, pvfs_path, PVFS_NAME_MAX); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_util_resolve", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_sys_getparent(cur_fs, pvfs_path, &credentials, &resp_getparent); ret = PVFS_sys_remove(resp_getparent.basename, resp_getparent.parent_ref, &credentials); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_remove", 0); return; } /* --END ERROR HANDLING-- */ *error_code = MPI_SUCCESS; return; }
void ADIOI_PVFS2_Flush(ADIO_File fd, int *error_code) { int ret, rank, dummy=0, dummy_in=0; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_FLUSH"; *error_code = MPI_SUCCESS; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; MPI_Comm_rank(fd->comm, &rank); /* unlike ADIOI_PVFS2_Resize, MPI_File_sync() does not perform any * syncronization */ MPI_Reduce(&dummy_in, &dummy, 1, MPI_INT, MPI_SUM, fd->hints->ranklist[0], fd->comm); /* io_worker computed in ADIO_Open */ if (rank == fd->hints->ranklist[0]) { ret = PVFS_sys_flush(pvfs_fs->object_ref, &(pvfs_fs->credentials)); MPI_Bcast(&ret, 1, MPI_INT, 0, fd->comm); } else { MPI_Bcast(&ret, 1, MPI_INT, 0, fd->comm); } /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_flush", 0); } /* --END ERROR HANDLING-- */ }
void ADIOI_PVFS2_Fcntl(ADIO_File fd, int flag, ADIO_Fcntl_t *fcntl_struct, int *error_code) { int ret; ADIOI_PVFS2_fs *pvfs_fs; PVFS_sysresp_getattr resp_getattr; static char myname[] = "ADIOI_PVFS2_FCNTL"; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; switch(flag) { case ADIO_FCNTL_GET_FSIZE: ret = PVFS_sys_getattr(pvfs_fs->object_ref, PVFS_ATTR_SYS_SIZE, &(pvfs_fs->credentials), &resp_getattr); if (ret != 0 ) { /* --BEGIN ERROR HANDLING-- */ *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_getattr", 0); /* --END ERROR HANDLING-- */ } else { *error_code = MPI_SUCCESS; } fcntl_struct->fsize = resp_getattr.attr.size; return; case ADIO_FCNTL_SET_DISKSPACE: ADIOI_GEN_Prealloc(fd, fcntl_struct->diskspace, error_code); break; /* --BEGIN ERROR HANDLING-- */ case ADIO_FCNTL_SET_ATOMICITY: default: *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "**flag", "**flag %d", flag); /* --END ERROR HANDLING-- */ } }
/* as with ADIOI_PVFS2_Flush, implement the resize operation in a scalable * manner. one process does the work, then broadcasts the result to everyone * else. fortunately, this operation is defined to be collective */ void ADIOI_PVFS2_Resize(ADIO_File fd, ADIO_Offset size, int *error_code) { int ret, rank; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_RESIZE"; *error_code = MPI_SUCCESS; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; MPI_Comm_rank(fd->comm, &rank); /* We desginate one node in the communicator to be an 'io_worker' in * ADIO_Open. This node can perform operations on files and then * inform the other nodes of the result */ /* MPI-IO semantics treat conflicting MPI_File_set_size requests the * same as conflicting write requests. Thus, a resize from one * process does not have to be visible to the other processes until a * syncronization point is reached */ if (rank == fd->hints->ranklist[0]) { ret = PVFS_sys_truncate(pvfs_fs->object_ref, size, &(pvfs_fs->credentials)); MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); } else { MPI_Bcast(&ret, 1, MPI_INT, fd->hints->ranklist[0], fd->comm); } /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_truncate", 0); return; } /* --END ERROR HANDLING-- */ }
void ADIOI_PVFS2_AIO_contig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, MPI_Request *request, int flag, int *error_code) { int ret; MPI_Count datatype_size, len; ADIOI_PVFS2_fs *pvfs_fs; ADIOI_AIO_Request *aio_req; static char myname[] = "ADIOI_PVFS2_AIO_contig"; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; aio_req = (ADIOI_AIO_Request*)ADIOI_Calloc(sizeof(ADIOI_AIO_Request), 1); MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->mem_req)); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &(aio_req->file_req)); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { /* copy individual file pointer into offset variable, continue */ offset = fd->fp_ind; } if (flag == READ) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_a, 0, NULL ); #endif ret = PVFS_isys_read(pvfs_fs->object_ref, aio_req->file_req, offset, buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iread_b, 0, NULL ); #endif } else if (flag == WRITE) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_a, 0, NULL ); #endif ret = PVFS_isys_write(pvfs_fs->object_ref, aio_req->file_req, offset, buf, aio_req->mem_req, &(pvfs_fs->credentials), &(aio_req->resp_io), &(aio_req->op_id), NULL); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_iwrite_b, 0, NULL ); #endif } /* --BEGIN ERROR HANDLING-- */ if (ret < 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_isys_io", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ #ifdef HAVE_MPI_GREQUEST_EXTENSIONS /* posted. defered completion */ if (ret == 0) { if (ADIOI_PVFS2_greq_class == 0) { MPIX_Grequest_class_create(ADIOI_GEN_aio_query_fn, ADIOI_PVFS2_aio_free_fn, MPIU_Greq_cancel_fn, ADIOI_PVFS2_aio_poll_fn, ADIOI_PVFS2_aio_wait_fn, &ADIOI_PVFS2_greq_class); } MPIX_Grequest_class_allocate(ADIOI_PVFS2_greq_class, aio_req, request); memcpy(&(aio_req->req), request, sizeof(*request)); } #else /* if generalized request extensions not available, we will have to process * this operation right here */ int error; ret = PVFS_sys_wait(aio_req->op_id, "ADIOI_PVFS2_AIO_Contig", &error); if (ret == 0) { MPIO_Completed_request_create(&fd, len, error_code, request); } #endif /* immediate completion */ if (ret == 1) { MPIO_Completed_request_create(&fd, len, error_code, request); } if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += len; } fd->fp_sys_posn = offset + len; *error_code = MPI_SUCCESS; fn_exit: return; }
int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code, int rw_type) { int ret = -1, filetype_is_contig = -1; MPI_Count filetype_size = -1; int num_filetypes = 0, cur_flat_file_reg_off = 0; PVFS_Request tmp_mem_req, mem_req, tmp_file_req, file_req; PVFS_sysresp_io resp_io; ADIO_Offset off = -1, bytes_into_filetype = 0; MPI_Aint filetype_extent = -1; int i = -1; MPI_Count etype_size; PVFS_size pvfs_disp = -1; ADIOI_Flatlist_node *flat_file_p = ADIOI_Flatlist; /* Use for offseting the PVFS2 filetype */ int pvfs_blk = 1; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_STRIDED_DTYPE"; memset(&tmp_mem_req, 0, sizeof(PVFS_Request)); memset(&mem_req, 0, sizeof(PVFS_Request)); memset(&tmp_file_req, 0, sizeof(PVFS_Request)); memset(&file_req, 0, sizeof(PVFS_Request)); pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); /* changed below if error */ *error_code = MPI_SUCCESS; /* datatype is the memory type * fd->filetype is the file type */ MPI_Type_size_x(fd->filetype, &filetype_size); if (filetype_size == 0) { *error_code = MPI_SUCCESS; return -1; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(fd->etype, &etype_size); if (filetype_size == 0) { *error_code = MPI_SUCCESS; return -1; } /* offset is in units of etype relative to the filetype. We * convert this to off in terms of actual data bytes (the offset * minus the number of bytes that are not used). We are allowed * to do this since PVFS2 handles offsets with respect to a * file_req in bytes, otherwise we would have to convert into a * pure byte offset as is done in other methods. Explicit offset * case is handled by using fd->disp and byte-converted off. */ pvfs_disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { if (filetype_is_contig) { off = fd->fp_ind - fd->disp; } else { int flag = 0; /* Should have already been flattened in ADIO_Open*/ while (flat_file_p->type != fd->filetype) { flat_file_p = flat_file_p->next; } num_filetypes = -1; while (!flag) { num_filetypes++; for (i = 0; i < flat_file_p->count; i++) { /* Start on a non zero-length region */ if (flat_file_p->blocklens[i]) { if (fd->disp + flat_file_p->indices[i] + (num_filetypes * filetype_extent) + flat_file_p->blocklens[i] > fd->fp_ind && fd->disp + flat_file_p->indices[i] <= fd->fp_ind) { cur_flat_file_reg_off = fd->fp_ind - (fd->disp + flat_file_p->indices[i] + (num_filetypes * filetype_extent)); flag = 1; break; } else bytes_into_filetype += flat_file_p->blocklens[i]; } } } /* Impossible that we don't find it in this datatype */ assert(i != flat_file_p->count); off = bytes_into_filetype + cur_flat_file_reg_off; } } else /* ADIO_EXPLICIT */ { off = etype_size * offset; } #ifdef DEBUG_DTYPE fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: (fd->fp_ind=%Ld,fd->disp=%Ld," " offset=%Ld),(pvfs_disp=%Ld,off=%Ld)\n", fd->fp_ind, fd->disp, offset, pvfs_disp, off); #endif /* Convert the MPI memory and file datatypes into * PVFS2 datatypes */ ret = convert_mpi_pvfs2_dtype(&datatype, &tmp_mem_req); if (ret < 0) { goto error_state; } ret = convert_mpi_pvfs2_dtype(&(fd->filetype), &tmp_file_req); if (ret < 0) { goto error_state; } ret = PVFS_Request_contiguous(count, tmp_mem_req, &mem_req); if (ret != 0) /* TODO: convert this to MPIO error handling */ fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final" " CONTIG memory type\n"); PVFS_Request_free(&tmp_mem_req); /* pvfs_disp is used to offset the filetype */ ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp, tmp_file_req, &file_req); if (ret != 0) fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: error in final" " HINDEXED file type\n"); PVFS_Request_free(&tmp_file_req); if (rw_type == READ) ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, off, buf, mem_req, &(pvfs_fs->credentials), &resp_io); else ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, off, buf, mem_req, &(pvfs_fs->credentials), &resp_io); if (ret != 0) { fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_" "read/write returned %d and completed %Ld bytes.\n", ret, (long long)resp_io.total_completed); *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_io \n", 0); goto error_state; } if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind = off += resp_io.total_completed; } error_state: fd->fp_sys_posn = -1; /* set it to null. */ PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); #ifdef DEBUG_DTYPE fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: " "resp_io.total_completed=%Ld,ret=%d\n", resp_io.total_completed, ret); #endif #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, resp_io.total_completed); /* This is a temporary way of filling in status. The right way is to * keep track of how much data was actually acccessed by * ADIOI_BUFFERED operations */ #endif return ret; }
void ADIOI_PVFS2_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status * status, int *error_code) { int ret; MPI_Count datatype_size, len; PVFS_Request file_req, mem_req; PVFS_sysresp_io resp_io; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_WRITECONTIG"; pvfs_fs = (ADIOI_PVFS2_fs *) fd->fs_ptr; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_Request_contiguous (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_Request_contiguous (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, offset, (void *) buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_write", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ fd->fp_sys_posn = offset + (int) resp_io.total_completed; } else { #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, fd->fp_ind, (void *) buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_write", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ fd->fp_ind += (int) resp_io.total_completed; fd->fp_sys_posn = fd->fp_ind; } #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, resp_io.total_completed); #endif *error_code = MPI_SUCCESS; fn_exit: PVFS_Request_free(&file_req); PVFS_Request_free(&mem_req); return; }
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { int ret, datatype_size, len; PVFS_Request file_req, mem_req; PVFS_sysresp_io resp_io; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_READCONTIG"; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; MPI_Type_size(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { /* copy individual file pointer into offset variable, continue */ offset = fd->fp_ind; } #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, offset, buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_read", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += (int) resp_io.total_completed; /* TODO: WHY THE INT CAST? */ } fd->fp_sys_posn = offset + (int)resp_io.total_completed; #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, (int)resp_io.total_completed); #endif *error_code = MPI_SUCCESS; fn_exit: PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); return; }
void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { /* as with all the other WriteStrided functions, offset is in units of * etype relative to the filetype */ /* Since PVFS2 does not support file locking, can't do buffered writes as on Unix */ ADIOI_Flatlist_node *flat_buf, *flat_file; int i, j, k, bwr_size, fwr_size=0, st_index=0; int bufsize, sum, n_etypes_in_filetype, size_in_filetype; int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset off, disp, start_off, initial_off; int flag, st_fwr_size, st_n_filetypes; int err_flag=0; int mem_list_count, file_list_count; PVFS_size * mem_offsets; int64_t *file_offsets; int *mem_lengths; int32_t *file_lengths; int total_blks_to_write; int max_mem_list, max_file_list; int b_blks_wrote; int f_data_wrote; int size_wrote=0, n_write_lists, extra_blks; int end_bwr_size, end_fwr_size; int start_k, start_j, new_file_write, new_buffer_write; int start_mem_offset; PVFS_Request mem_req, file_req; ADIOI_PVFS2_fs * pvfs_fs; PVFS_sysresp_io resp_io; MPI_Offset total_bytes_written=0; static char myname[] = "ADIOI_PVFS2_WRITESTRIDED"; /* note: don't increase this: several parts of PVFS2 now * assume this limit*/ #define MAX_ARRAY_SIZE 64 /* --BEGIN ERROR HANDLING-- */ if (fd->atomicity) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "Atomic noncontiguous writes are not supported by PVFS2", 0); return; } /* --END ERROR HANDLING-- */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); /* the HDF5 tests showed a bug in this list processing code (see many many * lines down below). We added a workaround, but common HDF5 file types * are actually contiguous and do not need the expensive workarond */ if (!filetype_is_contig) { flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; if (flat_file->count == 1 && !buftype_is_contig) filetype_is_contig = 1; } MPI_Type_size(fd->filetype, &filetype_size); if ( ! filetype_size ) { #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif *error_code = MPI_SUCCESS; return; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; bufsize = buftype_size * count; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; if (!buftype_is_contig && filetype_is_contig) { /* noncontiguous in memory, contiguous in file. */ int64_t file_offsets; int32_t file_lengths; ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + etype_size * offset; } else off = fd->fp_ind; file_list_count = 1; file_offsets = off; file_lengths = 0; total_blks_to_write = count*flat_buf->count; b_blks_wrote = 0; /* allocate arrays according to max usage */ if (total_blks_to_write > MAX_ARRAY_SIZE) mem_list_count = MAX_ARRAY_SIZE; else mem_list_count = total_blks_to_write; mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); j = 0; /* step through each block in memory, filling memory arrays */ while (b_blks_wrote < total_blks_to_write) { for (i=0; i<flat_buf->count; i++) { mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = /* TODO: fix this warning by casting to an integer that's * the same size as a char * and /then/ casting to * PVFS_size */ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_wrote++; if (!(b_blks_wrote % MAX_ARRAY_SIZE) || (b_blks_wrote == total_blks_to_write)) { /* in the case of the last write list call, adjust mem_list_count */ if (b_blks_wrote == total_blks_to_write) { mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE; /* in case last write list call fills max arrays */ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; } err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (memory)", 0); break; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_contiguous(file_lengths, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (file)", 0); break; } /* --END ERROR HANDLING-- */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, file_offsets, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif total_bytes_written += resp_io.total_completed; /* in the case of error or the last write list call, * leave here */ /* --BEGIN ERROR HANDLING-- */ if (err_flag) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); break; } /* --END ERROR HANDLING-- */ if (b_blks_wrote == total_blks_to_write) break; file_offsets += file_lengths; file_lengths = 0; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } } /* for (i=0; i<flat_buf->count; i++) */ j++; } /* while (b_blks_wrote < total_blks_to_write) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_written; if (!err_flag) *error_code = MPI_SUCCESS; fd->fp_sys_posn = -1; /* clear this. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif ADIOI_Delete_flattened(datatype); return; } /* if (!buftype_is_contig && filetype_is_contig) */ /* already know that file is noncontiguous from above */ /* noncontiguous in file */ /* filetype already flattened in ADIO_Open */ flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; disp = fd->disp; initial_off = offset; /* for each case - ADIO_Individual pointer or explicit, find offset (file offset in bytes), n_filetypes (how many filetypes into file to start), fwr_size (remaining amount of data in present file block), and st_index (start point in terms of blocks in starting filetype) */ if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; /* in bytes */ n_filetypes = -1; flag = 0; while (!flag) { n_filetypes++; for (i=0; i<flat_file->count; i++) { if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; fwr_size = disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; break; } } } /* while (!flag) */ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ else { n_etypes_in_filetype = filetype_size/etype_size; n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; sum = 0; for (i=0; i<flat_file->count; i++) { sum += flat_file->blocklens[i]; if (sum > size_in_filetype) { st_index = i; fwr_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[i] + size_in_filetype - (sum - flat_file->blocklens[i]); break; } } /* abs. offset in bytes in the file */ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + abs_off_in_filetype; } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ start_off = offset; st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most common case. */ int mem_lengths; char *mem_offsets; i = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; mem_list_count = 1; /* determine how many blocks in file to write */ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); total_blks_to_write = 1; if (j < (flat_file->count -1)) j++; else { j = 0; n_filetypes++; } while (f_data_wrote < bufsize) { f_data_wrote += flat_file->blocklens[j]; total_blks_to_write++; if (j<(flat_file->count-1)) j++; else j = 0; } j = st_index; n_filetypes = st_n_filetypes; n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; mem_offsets = buf; mem_lengths = 0; /* if at least one full writelist, allocate file arrays at max array size and don't free until very end */ if (n_write_lists) { file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* sizeof(int64_t)); file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* sizeof(int32_t)); } /* if there's no full writelist allocate file arrays according to needed size (extra_blks) */ else { file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* sizeof(int64_t)); file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; i<n_write_lists; i++) { file_list_count = MAX_ARRAY_SIZE; if(!i) { file_offsets[0] = offset; file_lengths[0] = st_fwr_size; mem_lengths = st_fwr_size; } for (k=0; k<MAX_ARRAY_SIZE; k++) { if (i || k) { file_offsets[k] = disp + ((ADIO_Offset)n_filetypes)*filetype_extent + flat_file->indices[j]; file_lengths[k] = flat_file->blocklens[j]; mem_lengths += file_lengths[k]; } if (j<(flat_file->count - 1)) j++; else { j = 0; n_filetypes++; } } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */ err_flag = PVFS_Request_contiguous(mem_lengths, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (file)", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* PVFS_Request_hindexed already expresses the offsets into the * file, so we should not pass in an offset if we are using * hindexed for the file type */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ total_bytes_written += resp_io.total_completed; mem_offsets += mem_lengths; mem_lengths = 0; PVFS_Request_free(&file_req); PVFS_Request_free(&mem_req); } /* for (i=0; i<n_write_lists; i++) */ /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */ if (extra_blks) { file_list_count = extra_blks; if(!i) { file_offsets[0] = offset; file_lengths[0] = ADIOI_MIN(st_fwr_size, bufsize); } for (k=0; k<extra_blks; k++) { if(i || k) { file_offsets[k] = disp + ((ADIO_Offset)n_filetypes)*filetype_extent + flat_file->indices[j]; if (k == (extra_blks - 1)) { file_lengths[k] = bufsize - (int32_t) mem_lengths - (int32_t) mem_offsets + (int32_t) buf; } else file_lengths[k] = flat_file->blocklens[j]; } /* if(i || k) */ mem_lengths += file_lengths[k]; if (j<(flat_file->count - 1)) j++; else { j = 0; n_filetypes++; } } /* for (k=0; k<extra_blks; k++) */ err_flag = PVFS_Request_contiguous(mem_lengths, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed(file)", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* as above, use 0 for 'offset' when using hindexed file type*/ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ total_bytes_written += resp_io.total_completed; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } } else { /* noncontiguous in memory as well as in file */ ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; bwr_size = flat_buf->blocklens[0]; buf_count = 0; start_mem_offset = 0; start_k = k = 0; start_j = st_index; max_mem_list = 0; max_file_list = 0; /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are equal, and the sum of all the data written and data to be written in the next immediate write list is less than bufsize */ if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } else new_buffer_write = bwr_size; } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; j--; } else { new_file_write += flat_file->blocklens[j]; end_fwr_size = flat_file->blocklens[j]; } } else { if (fwr_size > new_buffer_write) { new_file_write = new_buffer_write; fwr_size = new_file_write; } else new_file_write = fwr_size; } file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; k = start_k; if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; while (new_buffer_write < new_file_write) { if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { new_buffer_write = bwr_size; if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fakes filling the writelist arrays of lengths found above */ k = start_k; j = start_j; for (i=0; i<mem_list_count; i++) { if(i) { if (i == (mem_list_count - 1)) { if (flat_buf->blocklens[k] == end_bwr_size) bwr_size = flat_buf->blocklens[(k+1)% flat_buf->count]; else { bwr_size = flat_buf->blocklens[k] - end_bwr_size; k--; buf_count--; } } } buf_count++; k = (k + 1)%flat_buf->count; } /* for (i=0; i<mem_list_count; i++) */ for (i=0; i<file_list_count; i++) { if (i) { if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; } } } if (j < flat_file->count - 1) j++; else { j = 0; n_filetypes++; } } /* for (i=0; i<file_list_count; i++) */ size_wrote += new_buffer_write; start_k = k; start_j = j; if (max_mem_list < mem_list_count) max_mem_list = mem_list_count; if (max_file_list < file_list_count) max_file_list = file_list_count; } /* while (size_wrote < bufsize) */ /* one last check before we actually carry out the operation: * this code has hard-to-fix bugs when a noncontiguous file type has * such large pieces that the sum of the lengths of the memory type is * not larger than one of those pieces (and vice versa for large memory * types and many pices of file types. In these cases, give up and * fall back to naive reads and writes. The testphdf5 test created a * type with two very large memory regions and 600 very small file * regions. The same test also created a type with one very large file * region and many (700) very small memory regions. both cases caused * problems for this code */ if ( ( (file_list_count == 1) && (new_file_write < flat_file->blocklens[0] ) ) || ((mem_list_count == 1) && (new_buffer_write < flat_buf->blocklens[0]) ) || ((file_list_count == MAX_ARRAY_SIZE) && (new_file_write < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_write < flat_file->blocklens[0])) ) { ADIOI_Delete_flattened(datatype); ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, file_ptr_type, initial_off, status, error_code); return; } mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; bwr_size = flat_buf->blocklens[0]; buf_count = 0; start_mem_offset = 0; start_k = k = 0; start_j = st_index; /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_fwr_size and new_bwr_size */ while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are equal, and the sum of all the data written and data to be written in the next immediate write list is less than bufsize */ if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } else new_buffer_write = bwr_size; } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; j--; } else { new_file_write += flat_file->blocklens[j]; end_fwr_size = flat_file->blocklens[j]; } } else { if (fwr_size > new_buffer_write) { new_file_write = new_buffer_write; fwr_size = new_file_write; } else new_file_write = fwr_size; } file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; k = start_k; if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; while (new_buffer_write < new_file_write) { if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { new_buffer_write = bwr_size; if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fills the allocated writelist arrays */ k = start_k; j = start_j; for (i=0; i<mem_list_count; i++) { /* TODO: fix this warning by casting to an integer that's the * same size as a char * and /then/ casting to PVFS_size */ mem_offsets[i] = ((PVFS_size)buf + buftype_extent* (buf_count/flat_buf->count) + (int)flat_buf->indices[k]); if(!i) { mem_lengths[0] = bwr_size; mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; } else { if (i == (mem_list_count - 1)) { mem_lengths[i] = end_bwr_size; if (flat_buf->blocklens[k] == end_bwr_size) bwr_size = flat_buf->blocklens[(k+1)% flat_buf->count]; else { bwr_size = flat_buf->blocklens[k] - end_bwr_size; k--; buf_count--; } } else { mem_lengths[i] = flat_buf->blocklens[k]; } } buf_count++; k = (k + 1)%flat_buf->count; } /* for (i=0; i<mem_list_count; i++) */ for (i=0; i<file_list_count; i++) { file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = fwr_size; file_offsets[0] += flat_file->blocklens[j] - fwr_size; } else { if (i == (file_list_count - 1)) { file_lengths[i] = end_fwr_size; if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; } } else file_lengths[i] = flat_file->blocklens[j]; } if (j < flat_file->count - 1) j++; else { j = 0; n_filetypes++; } } /* for (i=0; i<file_list_count; i++) */ err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* offset will be expressed in memory and file datatypes */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ size_wrote += new_buffer_write; total_bytes_written += resp_io.total_completed; start_k = k; start_j = j; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } /* while (size_wrote < bufsize) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); } /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not * at empty region at offset N+1). * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next * time we run through the strided code we'll update the fp_ind to the * right location. */ if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind = file_offsets[file_list_count-1]+ file_lengths[file_list_count-1]; } ADIOI_Free(file_offsets); ADIOI_Free(file_lengths); *error_code = MPI_SUCCESS; error_state: fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); }
/* ADIOI_PVFS2_Open: * one process opens (or creates) the file, then broadcasts the result to the * remaining processors. * * ADIO_Open used to perform an optimization when MPI_MODE_CREATE (and before * that, MPI_MODE_EXCL) was set. Because PVFS2 handles file lookup and * creation more scalably than other file systems, ADIO_Open now skips any * special handling when CREATE is set. */ void ADIOI_PVFS2_Open(ADIO_File fd, int *error_code) { int rank, ret; PVFS_fs_id cur_fs; static char myname[] = "ADIOI_PVFS2_OPEN"; char pvfs_path[PVFS_NAME_MAX] = {0}; ADIOI_PVFS2_fs *pvfs2_fs; /* since one process is doing the open, that means one process is also * doing the error checking. define a struct for both the object reference * and the error code to broadcast to all the processors */ open_status o_status = {0, {0, 0}}; MPI_Datatype open_status_type; MPI_Datatype types[2] = {MPI_INT, MPI_BYTE}; int lens[2] = {1, sizeof(PVFS_object_ref)}; MPI_Aint offsets[2]; pvfs2_fs = (ADIOI_PVFS2_fs *) ADIOI_Malloc(sizeof(ADIOI_PVFS2_fs)); /* --BEGIN ERROR HANDLING-- */ if (pvfs2_fs == NULL) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_UNKNOWN, "Error allocating memory", 0); return; } /* --END ERROR HANDLING-- */ MPI_Comm_rank(fd->comm, &rank); ADIOI_PVFS2_Init(error_code); if (*error_code != MPI_SUCCESS) { /* ADIOI_PVFS2_INIT handles creating error codes on its own */ return; } /* currently everyone gets their own credentials */ ADIOI_PVFS2_makecredentials(&(pvfs2_fs->credentials)); /* one process resolves name and will later bcast to others */ if (rank == fd->hints->ranklist[0] && fd->fs_ptr == NULL) { /* given the filename, figure out which pvfs filesystem it is on */ ret = PVFS_util_resolve(fd->filename, &cur_fs, pvfs_path, PVFS_NAME_MAX); if (ret < 0 ) { PVFS_perror("PVFS_util_resolve", ret); /* TODO: pick a good error for this */ o_status.error = -1; } else { fake_an_open(cur_fs, pvfs_path, fd->access_mode, fd->hints->striping_factor, fd->hints->striping_unit, pvfs2_fs, &o_status); } /* store credentials and object reference in fd */ pvfs2_fs->object_ref = o_status.object_ref; fd->fs_ptr = pvfs2_fs; } /* broadcast status and (possibly valid) object reference */ MPI_Address(&o_status.error, &offsets[0]); MPI_Address(&o_status.object_ref, &offsets[1]); MPI_Type_struct(2, lens, offsets, types, &open_status_type); MPI_Type_commit(&open_status_type); /* Assertion: if we hit this Bcast, then all processes collectively * called this open. * * That's because deferred open never happens with PVFS2. */ MPI_Bcast(MPI_BOTTOM, 1, open_status_type, fd->hints->ranklist[0], fd->comm); MPI_Type_free(&open_status_type); /* --BEGIN ERROR HANDLING-- */ if (o_status.error != 0) { ADIOI_Free(pvfs2_fs); *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(o_status.error), "Unknown error", 0); /* TODO: FIX STRING */ return; } /* --END ERROR HANDLING-- */ pvfs2_fs->object_ref = o_status.object_ref; fd->fs_ptr = pvfs2_fs; *error_code = MPI_SUCCESS; return; }