int PVFS_Request_free(PVFS_Request * req) { PVFS_Request reqp; if (req == NULL) { gossip_lerr("PVFS_Request_free: NULL pointer argument\n"); return PVFS_ERR_REQ; } if (*req == NULL) { gossip_lerr("PVFS_Request_free: pointer to NULL pointer argument\n"); return PVFS_ERR_REQ; } if ((*req)->refcount <= 0) { /* if refcount is 0 then it has already been freed */ /* if less than 0 it should not be freed */ /* can't be sure if this is users's variable or not */ gossip_debug(GOSSIP_REQUEST_DEBUG, "don't free special request\n"); return PVFS_SUCCESS; } PINT_REQUEST_REFDEC(*req); if ((*req)->refcount > 0) { /* not ready to free this yet */ *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "don't free referenced request\n"); return PVFS_SUCCESS; } if (PINT_REQUEST_IS_PACKED(*req)) { /* these are contiguous and have no external refs */ free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free packed request\n"); return PVFS_SUCCESS; } /* this deals with the sreq chain */ reqp = (*req)->sreq; while (reqp) { PVFS_Request reqp_next; PVFS_Request_free(&(reqp->ereq)); /* this is a little awkward but it works */ reqp_next = reqp->sreq; free(reqp); gossip_debug(GOSSIP_REQUEST_DEBUG, "free sreq linked request\n"); reqp = reqp_next; } /* now deal with the main struct */ PVFS_Request_free(&((*req)->ereq)); free(*req); *req = NULL; gossip_debug(GOSSIP_REQUEST_DEBUG, "free unpacked request\n"); return PVFS_SUCCESS; }
int ADIOI_PVFS2_aio_free_fn(void *extra_state) { ADIOI_AIO_Request *aio_req; aio_req = (ADIOI_AIO_Request*)extra_state; PVFS_Request_free(&(aio_req->mem_req)); PVFS_Request_free(&(aio_req->file_req)); ADIOI_Free(aio_req); return MPI_SUCCESS; }
/* write 'count' bytes from 'buffer' into (unix or pvfs2) file 'dest' */ size_t generic_write(file_object *dest, char *buffer, int64_t offset, size_t count, PVFS_credentials *credentials) { PVFS_Request mem_req, file_req; PVFS_sysresp_io resp_io; int ret; if (dest->fs_type == UNIX_FILE) return(write(dest->u.ufs.fd, buffer, count)); else { file_req = PVFS_BYTE; ret = PVFS_Request_contiguous(count, PVFS_BYTE, &mem_req); if (ret < 0) { PVFS_perror("PVFS_Request_contiguous", ret); return(ret); } ret = PVFS_sys_write(dest->u.pvfs2.ref, file_req, offset, buffer, mem_req, credentials, &resp_io, hints); if (ret == 0) { PVFS_Request_free(&mem_req); return(resp_io.total_completed); } else PVFS_perror("PVFS_sys_write", ret); } return ret; }
/* read 'count' bytes from a (unix or pvfs2) file 'src', placing the result in * 'buffer' */ size_t generic_read(file_object *src, char *buffer, int64_t offset, size_t count, PVFS_credentials *credentials) { PVFS_Request mem_req, file_req; PVFS_sysresp_io resp_io; int ret; if(src->fs_type == UNIX_FILE) return (read(src->u.ufs.fd, buffer, count)); else { file_req = PVFS_BYTE; ret = PVFS_Request_contiguous(count, PVFS_BYTE, &mem_req); if (ret < 0) { fprintf(stderr, "Error: PVFS_Request_contiguous failure\n"); return (ret); } ret = PVFS_sys_read(src->u.pvfs2.ref, file_req, offset, buffer, mem_req, credentials, &resp_io, hints); if (ret == 0) { PVFS_Request_free(&mem_req); return (resp_io.total_completed); } else PVFS_perror("PVFS_sys_read", ret); } return (ret); }
/** * PVFSIOSHandle::Pwrite. positional write wrapper. * * @param buf the buffer to write from * @param count the number of bytes to write * @param offset the offset to write from * @param bytes_written return bytes that have been written(0, size count) * @return PLFS_SUCCESS or PLFS_E* */ plfs_error_t PVFSIOSHandle::Pwrite(const void* buf, size_t count, off_t offset, ssize_t *bytes_written) { PVFS_Request mem_req, file_req; PVFS_sysresp_io resp_io; int pev; file_req = PVFS_BYTE; /* reading bytes from the file ... */ /* ... into a contig buffer of size count */ pev = PVFS_Request_contiguous(count, PVFS_BYTE, &mem_req); if (pev < 0) { return errno_to_plfs_error(-get_err(pev)); } pev = PVFS_sys_write(this->ref, file_req, offset, (void*)buf, mem_req, &this->creds, &resp_io); PVFS_Request_free(&mem_req); /* XXX: see comment in Pread */ if (pev < 0) { /* XXX: don't need to free mem_req in this case? */ return errno_to_plfs_error(-get_err(pev)); } *bytes_written = resp_io.total_completed; return PLFS_SUCCESS; }
/** * PVFSIOSHandle::Pread: A wrapper around the PVFS read call * * @param buf the buffer to read into * @param count the number of bytes to read * @param offset the offset to read from * @param bytes_read return bytes that have been read(0, size count) * @return PLFS_SUCCESS or PLFS_E* */ plfs_error_t PVFSIOSHandle::Pread(void* buf, size_t count, off_t offset, ssize_t *bytes_read) { PVFS_Request mem_req, file_req; PVFS_sysresp_io resp_io; int pev; file_req = PVFS_BYTE; /* reading bytes from the file ... */ /* ... into a contig buffer of size count */ pev = PVFS_Request_contiguous(count, PVFS_BYTE, &mem_req); if (pev < 0) { return errno_to_plfs_error(-get_err(pev)); } pev = PVFS_sys_read(this->ref, file_req, offset, buf, mem_req, &this->creds, &resp_io); /* * pvfs2fuse doesn't PVFS_Request_free on error, this seem like a * memory leak bug to me, since mem_req is a pointer that gets * malloc'd and set in PVFS_Request_contiguous()... you still * gotta free it even if PVFS_sys_real fails. */ PVFS_Request_free(&mem_req); if (pev < 0) { /* XXX: don't need to free mem_req in this case? */ return errno_to_plfs_error(-get_err(pev)); } *bytes_read = resp_io.total_completed; return PLFS_SUCCESS; }
/** * PVFSIOSHandle::GetDataBuf: load some data into buffers. PVFS * doesn't support mmap, so we will malloc/free the buffer. * * @param bufp allocated buffer pointer put here * @param length length of the data we want * @return PLFS_SUCCESS or PLFS_E* */ plfs_error_t PVFSIOSHandle::GetDataBuf(void **bufp, size_t length) { size_t bytes_read; char *buffer; PVFS_Request mem_req, file_req; PVFS_sysresp_io resp_io; int pev, nev, goteof; /* init and allocate a buffer */ bytes_read = 0; buffer = (char *)malloc(length); if (!buffer) { return(PLFS_ENOMEM); } pev = goteof = 0; while (bytes_read < length) { /* * describe the format of the file and the buffer we are loading * the data in. in this case it is simple: all contiguous. */ file_req = PVFS_BYTE; /* reading bytes from the file ... */ /* ... into a contig buffer of size length-bytes_read */ pev = PVFS_Request_contiguous(length-bytes_read, PVFS_BYTE, &mem_req); if (pev < 0) { break; } pev = PVFS_sys_read(this->ref, file_req, bytes_read /*offset*/, buffer+bytes_read, mem_req, &this->creds, &resp_io); PVFS_Request_free(&mem_req); /* XXX: see comment in Pread */ if (pev < 0) { break; } if (resp_io.total_completed == 0) { goteof++; break; } bytes_read += resp_io.total_completed; } if (pev < 0 || goteof) { free(buffer); nev = (goteof) ? -EWOULDBLOCK : get_err(pev); return errno_to_plfs_error(-nev); } *bufp = buffer; return PLFS_SUCCESS; }
int main(int argc, char * argv[]) { FILE * f; int ret; PVFS_fs_id curfs; PVFS_Request file_req; PVFS_Request mem_req; int count; char line[255]; int size; PVFS_offset offset=0; PVFS_credentials creds; PVFS_sysresp_create create_resp; PVFS_sysresp_io io_resp; PVFS_sysresp_lookup lookup_resp; PVFS_sys_attr attr; const char * filename = "test-accesses-file"; int j = 0, i = 0; char * membuff; char errormsg[255]; if(argc < 2) { fprintf(stderr, "test-accesses <sizes file>\n"); exit(1); } f = fopen(argv[1], "r"); if(!f) { fprintf(stderr, "error opening file\n"); return errno; } if(fgets(line, 255, f) == NULL) { fprintf(stderr, "error in file\n"); exit(1); } if(sscanf(line, "%d", &count) < 1) { fprintf(stderr, "error in file\n"); exit(1); } ret = PVFS_util_init_defaults(); if(ret < 0) goto error; ret = PVFS_util_get_default_fsid(&curfs); if(ret < 0) goto error; ret = PVFS_sys_lookup(curfs, "/", &creds, &lookup_resp, 0, NULL); if(ret < 0) goto error; PVFS_util_gen_credentials(&creds); attr.mask = PVFS_ATTR_SYS_ALL_SETABLE; attr.owner = creds.uid; attr.group = creds.gid; attr.perms = 0644; attr.atime = attr.ctime = attr.mtime = time(NULL); ret = PVFS_sys_create( (char*)filename, lookup_resp.ref, attr, &creds, NULL, &create_resp, NULL, NULL); if(ret < 0) goto error; for(; i < count; ++i) { if(fgets(line, 255, f) == NULL) { fprintf(stderr, "error in file\n"); exit(1); } if(sscanf(line, "%d", &size) < 1) { fprintf(stderr, "error in file\n"); exit(1); } membuff = malloc(size); assert(membuff); for(j = 0; j < size; ++j) { membuff[j] = j; } ret = PVFS_Request_contiguous( size, PVFS_BYTE, &file_req); if(ret < 0) goto error; ret = PVFS_Request_contiguous( size, PVFS_BYTE, &mem_req); if(ret < 0) goto error; printf("Performing Write: offset: %llu, size: %d\n", llu(offset), size); ret = PVFS_sys_io( create_resp.ref, file_req, offset, membuff, mem_req, &creds, &io_resp, PVFS_IO_WRITE, NULL); if(ret < 0) goto error; printf("Write response: size: %llu\n", llu(io_resp.total_completed)); offset += size; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); free(membuff); } return 0; error: fclose(f); PVFS_sys_remove( (char*)filename, lookup_resp.ref, &creds, NULL); PVFS_perror_gossip(errormsg, ret); fprintf(stderr, "%s\n", errormsg); return PVFS_get_errno_mapping(ret); }
int pvfs_check_vector(const struct iovec *iov, int count, PVFS_Request *req, void **buf) { int i; int vstart; int vlen; int bsz; PVFS_size stride; int32_t *bsz_array; PVFS_size *disp_array; PVFS_Request *req_array; int rblk; /* set up request arrays */ bsz_array = (int32_t *)malloc(count * sizeof(int32_t)); if (!bsz_array) { return -1; } disp_array = (PVFS_size *)malloc(count * sizeof(PVFS_size)); if (!disp_array) { free(bsz_array); return -1; } req_array = (PVFS_Request *)malloc(count * sizeof(PVFS_Request)); if (!disp_array) { free(disp_array); free(bsz_array); return -1; } /* for now we assume that addresses in the iovec are ascending */ /* not that otherwise won't work, but we're not sure */ /* the first address will be assumed to be the base address of */ /* the whole request. the displacement of each vector is relative */ /* to that address */ if (count > 0) { *buf = iov[0].iov_base; } rblk = 0; /* start at beginning of iovec */ i = 0; while(i < count) { /* starting a new vector at position i */ vstart = i; vlen = 1; bsz = iov[i].iov_len; stride = 0; /* vector blocks must be of equal size */ while(++i < count && iov[i].iov_len == bsz) { if(vlen == 1) { /* two blocks of equal size are a vector of two */ stride = (u_char *)iov[i].iov_base - (u_char *)iov[i - 1].iov_base; if (stride < bsz) { /* overlapping blocks and negative strides are problems */ break; } vlen++; } else if (((u_char *)iov[i].iov_base - (u_char *)iov[i - 1].iov_base) == stride) { /* to add more blocks, stride must match */ vlen++; } else { /* doesn't match - end of vector */ break; } } if (vlen == 1) { /* trivial conversion */ bsz_array[rblk] = iov[vstart].iov_len; disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base - (u_char *)*buf); req_array[rblk] = PVFS_BYTE; rblk++; } else { /* found a vector */ bsz_array[rblk] = 1; disp_array[rblk] = (PVFS_size)((u_char *)iov[vstart].iov_base - (u_char *)*buf); PVFS_Request_vector(vlen, bsz, stride, PVFS_BYTE, &req_array[rblk]); rblk++; } } /* now build full request */ PVFS_Request_struct(rblk, bsz_array, disp_array, req_array, req); PVFS_Request_commit(req); free(bsz_array); free(disp_array); while (rblk--) { if (req_array[rblk] != PVFS_BYTE) { PVFS_Request_free(&req_array[rblk]); } } free(req_array); /* req is not freed, the caller is expected to do that */ return 0; }
int convert_mpi_pvfs2_dtype(MPI_Datatype *mpi_dtype, PVFS_Request *pvfs_dtype) { int num_int = -1, num_addr = -1, num_dtype = -1, combiner = -1, i = -1, ret = -1, leaf = -1; int *arr_int = NULL; MPI_Aint *arr_addr = NULL; MPI_Datatype *arr_dtype = NULL; PVFS_Request *old_pvfs_dtype = NULL; PVFS_Request *old_pvfs_dtype_arr = NULL; int arr_count = -1; PVFS_size *pvfs_arr_disp = NULL; int *pvfs_arr_len = NULL; MPI_Type_get_envelope(*mpi_dtype, &num_int, &num_addr, &num_dtype, &combiner); /* Depending on type of datatype do the following * operations */ if (combiner == MPI_COMBINER_NAMED) { convert_named(mpi_dtype, pvfs_dtype, combiner); return 1; } /* Allocate space for the arrays necessary for * MPI_Type_get_contents */ if ((arr_int = ADIOI_Malloc(sizeof(int)*num_int)) == NULL) { fprintf(stderr, "Failed to allocate array_int\n"); return -1; } if ((arr_addr = ADIOI_Malloc(sizeof(int)*num_addr)) == NULL) { ADIOI_Free(arr_int); fprintf(stderr, "Failed to allocate array_addr\n"); return -1; } if ((arr_dtype = ADIOI_Malloc(sizeof(MPI_Datatype)*num_dtype)) == NULL) { ADIOI_Free(arr_int); ADIOI_Free(arr_addr); fprintf(stderr, "Failed to allocate array_dtypes\n"); return -1; } MPI_Type_get_contents(*mpi_dtype, num_int, num_addr, num_dtype, arr_int, arr_addr, arr_dtype); /* If it's not a predefined datatype, it is either a * derived datatype or a structured datatype */ if (combiner != MPI_COMBINER_STRUCT) { if ((old_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL) fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate PVFS_Request\n"); switch (combiner) { case MPI_COMBINER_CONTIGUOUS: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_contiguous(arr_int[0], *old_pvfs_dtype, pvfs_dtype); break; case MPI_COMBINER_VECTOR: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_vector(arr_int[0], arr_int[1], arr_int[2], *old_pvfs_dtype, pvfs_dtype); break; case MPI_COMBINER_HVECTOR: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_hvector(arr_int[0], arr_int[1], arr_addr[0], *old_pvfs_dtype, pvfs_dtype); break; /* Both INDEXED and HINDEXED types require PVFS_size * address arrays. Therefore, we need to copy and * convert the data from MPI_get_contents() into * a PVFS_size buffer */ case MPI_COMBINER_INDEXED: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); if ((pvfs_arr_disp = ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate pvfs_arr_disp\n"); } for (i = 0; i < arr_int[0]; i++) { pvfs_arr_disp[i] = (PVFS_size) arr_int[arr_int[0]+1+i]; } ret = PVFS_Request_indexed(arr_int[0], &arr_int[1], pvfs_arr_disp, *old_pvfs_dtype, pvfs_dtype); ADIOI_Free(pvfs_arr_disp); break; case MPI_COMBINER_HINDEXED: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); if ((pvfs_arr_disp = ADIOI_Malloc(arr_int[0]*sizeof(PVFS_size))) == 0) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate pvfs_arr_disp\n"); } for (i = 0; i < arr_int[0]; i++) { pvfs_arr_disp[i] = (PVFS_size) arr_addr[i]; } ret = PVFS_Request_hindexed(arr_int[0], &arr_int[1], (int64_t *)&arr_addr[0], *old_pvfs_dtype, pvfs_dtype); ADIOI_Free(pvfs_arr_disp); break; case MPI_COMBINER_DUP: leaf = convert_mpi_pvfs2_dtype(&arr_dtype[0], old_pvfs_dtype); ret = PVFS_Request_contiguous(1, *old_pvfs_dtype, pvfs_dtype); break; case MPI_COMBINER_INDEXED_BLOCK: /* No native PVFS2 support for this operation currently */ ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "INDEXED_BLOCK is unsupported\n"); break; case MPI_COMBINER_HINDEXED_BLOCK: /* No native PVFS2 support for this operation currently */ ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "HINDEXED_BLOCK is unsupported\n"); break; case MPI_COMBINER_HINDEXED_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "HINDEXED_INTEGER is unsupported\n"); break; case MPI_COMBINER_STRUCT_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "STRUCT_INTEGER is unsupported\n"); break; case MPI_COMBINER_SUBARRAY: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "SUBARRAY is unsupported\n"); break; case MPI_COMBINER_DARRAY: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "DARRAY is unsupported\n"); break; case MPI_COMBINER_F90_REAL: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "F90_REAL is unsupported\n"); break; case MPI_COMBINER_F90_COMPLEX: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "F90_COMPLEX is unsupported\n"); break; case MPI_COMBINER_F90_INTEGER: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "F90_INTEGER is unsupported\n"); break; case MPI_COMBINER_RESIZED: ADIOI_Free(old_pvfs_dtype); fprintf(stderr, "convert_mpi_pvfs2_dtype: " "RESIZED is unsupported\n"); break; default: break; } if (ret != 0) fprintf(stderr, "Error in PVFS_Request_* " "for a derived datatype\n"); #ifdef DEBUG_DTYPE print_dtype_info(combiner, num_int, num_addr, num_dtype, arr_int, arr_addr, arr_dtype); #endif if (leaf != 1 && combiner != MPI_COMBINER_DUP) MPI_Type_free(&arr_dtype[0]); ADIOI_Free(arr_int); ADIOI_Free(arr_addr); ADIOI_Free(arr_dtype); PVFS_Request_free(old_pvfs_dtype); ADIOI_Free(old_pvfs_dtype); return ret; } else /* MPI_COMBINER_STRUCT */ { MPI_Aint mpi_lb = -1, mpi_extent = -1; PVFS_offset pvfs_lb = -1; PVFS_size pvfs_extent = -1; int has_lb_ub = 0; /* When converting into a PVFS_Request_struct, we no longer * can use MPI_LB and MPI_UB. Therfore, we have to do the * following. * We simply ignore all the MPI_LB and MPI_UB types and * get the lb and extent and pass it on through a * PVFS resized_req */ arr_count = 0; for (i = 0; i < arr_int[0]; i++) { if (arr_dtype[i] != MPI_LB && arr_dtype[i] != MPI_UB) { arr_count++; } } if (arr_int[0] != arr_count) { MPI_Type_get_extent(*mpi_dtype, &mpi_lb, &mpi_extent); pvfs_lb = mpi_lb; pvfs_extent = mpi_extent; if ((pvfs_arr_len = ADIOI_Malloc(arr_count*sizeof(int))) == NULL) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate pvfs_arr_len\n"); } has_lb_ub = 1; } if ((old_pvfs_dtype_arr = ADIOI_Malloc(arr_count*sizeof(PVFS_Request))) == NULL) fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate PVFS_Requests\n"); if ((pvfs_arr_disp = ADIOI_Malloc(arr_count*sizeof(PVFS_size))) == NULL) { fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate pvfs_arr_disp\n"); } arr_count = 0; for (i = 0; i < arr_int[0]; i++) { if (arr_dtype[i] != MPI_LB && arr_dtype[i] != MPI_UB) { leaf = convert_mpi_pvfs2_dtype( &arr_dtype[i], &old_pvfs_dtype_arr[arr_count]); if (leaf != 1) MPI_Type_free(&arr_dtype[i]); pvfs_arr_disp[arr_count] = (PVFS_size) arr_addr[i]; if (has_lb_ub) { pvfs_arr_len[arr_count] = arr_int[i+1]; } arr_count++; } } /* If a MPI_UB or MPI_LB did exist, we have to * resize the datatype */ if (has_lb_ub) { PVFS_Request *tmp_pvfs_dtype = NULL; if ((tmp_pvfs_dtype = ADIOI_Malloc(sizeof(PVFS_Request))) == NULL) fprintf(stderr, "convert_mpi_pvfs2_dtype: " "Failed to allocate PVFS_Request\n"); ret = PVFS_Request_struct(arr_count, pvfs_arr_len, pvfs_arr_disp, old_pvfs_dtype_arr, tmp_pvfs_dtype); if (ret != 0) fprintf(stderr, "Error in PVFS_Request_struct\n"); arr_count = 0; for (i = 0; i < arr_int[0]; i++) { if (arr_dtype[i] != MPI_LB && arr_dtype[i] != MPI_UB) { PVFS_Request_free(&old_pvfs_dtype_arr[arr_count]); arr_count++; } } #ifdef DEBUG_DTYPE fprintf(stderr, "STRUCT(WITHOUT %d LB or UB)(%d,[", arr_int[0] - arr_count, arr_count); for (i = 0; i < arr_count; i++) fprintf(stderr, "(%d,%Ld) ", pvfs_arr_len[i], pvfs_arr_disp[i]); fprintf(stderr, "]\n"); fprintf(stderr, "RESIZED(LB = %Ld, EXTENT = %Ld)\n", pvfs_lb, pvfs_extent); #endif ret = PVFS_Request_resized(*tmp_pvfs_dtype, pvfs_lb, pvfs_extent, pvfs_dtype); if (ret != 0) fprintf(stderr, "Error in PVFS_Request_resize\n"); PVFS_Request_free(tmp_pvfs_dtype); ADIOI_Free(tmp_pvfs_dtype); } else /* No MPI_LB or MPI_UB datatypes */ { ret = PVFS_Request_struct(arr_int[0], &arr_int[1], pvfs_arr_disp, old_pvfs_dtype_arr, pvfs_dtype); if (ret != 0) fprintf(stderr, "Error in PVFS_Request_struct\n"); for (i = 0; i < arr_int[0]; i++) { if (arr_dtype[i] != MPI_LB && arr_dtype[i] != MPI_UB) PVFS_Request_free(&old_pvfs_dtype_arr[i]); } #ifdef DEBUG_DTYPE print_dtype_info(combiner, num_int, num_addr, num_dtype, arr_int, arr_addr, arr_dtype); #endif } ADIOI_Free(arr_int); ADIOI_Free(arr_addr); ADIOI_Free(arr_dtype); ADIOI_Free(old_pvfs_dtype_arr); ADIOI_Free(pvfs_arr_disp); ADIOI_Free(pvfs_arr_len); return ret; } /* Shouldn't have gotten here */ fprintf(stderr, "convert_mpi_pvfs2_dtype: SERIOUS ERROR\n"); return -1; }
int ADIOI_PVFS2_StridedDtypeIO(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code, int rw_type) { int ret = -1, filetype_is_contig = -1; MPI_Count filetype_size = -1; int num_filetypes = 0, cur_flat_file_reg_off = 0; PVFS_Request tmp_mem_req, mem_req, tmp_file_req, file_req; PVFS_sysresp_io resp_io; ADIO_Offset off = -1, bytes_into_filetype = 0; MPI_Aint filetype_extent = -1; int i = -1; MPI_Count etype_size; PVFS_size pvfs_disp = -1; ADIOI_Flatlist_node *flat_file_p = ADIOI_Flatlist; /* Use for offseting the PVFS2 filetype */ int pvfs_blk = 1; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_STRIDED_DTYPE"; memset(&tmp_mem_req, 0, sizeof(PVFS_Request)); memset(&mem_req, 0, sizeof(PVFS_Request)); memset(&tmp_file_req, 0, sizeof(PVFS_Request)); memset(&file_req, 0, sizeof(PVFS_Request)); pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); /* changed below if error */ *error_code = MPI_SUCCESS; /* datatype is the memory type * fd->filetype is the file type */ MPI_Type_size_x(fd->filetype, &filetype_size); if (filetype_size == 0) { *error_code = MPI_SUCCESS; return -1; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size_x(fd->etype, &etype_size); if (filetype_size == 0) { *error_code = MPI_SUCCESS; return -1; } /* offset is in units of etype relative to the filetype. We * convert this to off in terms of actual data bytes (the offset * minus the number of bytes that are not used). We are allowed * to do this since PVFS2 handles offsets with respect to a * file_req in bytes, otherwise we would have to convert into a * pure byte offset as is done in other methods. Explicit offset * case is handled by using fd->disp and byte-converted off. */ pvfs_disp = fd->disp; if (file_ptr_type == ADIO_INDIVIDUAL) { if (filetype_is_contig) { off = fd->fp_ind - fd->disp; } else { int flag = 0; /* Should have already been flattened in ADIO_Open*/ while (flat_file_p->type != fd->filetype) { flat_file_p = flat_file_p->next; } num_filetypes = -1; while (!flag) { num_filetypes++; for (i = 0; i < flat_file_p->count; i++) { /* Start on a non zero-length region */ if (flat_file_p->blocklens[i]) { if (fd->disp + flat_file_p->indices[i] + (num_filetypes * filetype_extent) + flat_file_p->blocklens[i] > fd->fp_ind && fd->disp + flat_file_p->indices[i] <= fd->fp_ind) { cur_flat_file_reg_off = fd->fp_ind - (fd->disp + flat_file_p->indices[i] + (num_filetypes * filetype_extent)); flag = 1; break; } else bytes_into_filetype += flat_file_p->blocklens[i]; } } } /* Impossible that we don't find it in this datatype */ assert(i != flat_file_p->count); off = bytes_into_filetype + cur_flat_file_reg_off; } } else /* ADIO_EXPLICIT */ { off = etype_size * offset; } #ifdef DEBUG_DTYPE fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: (fd->fp_ind=%Ld,fd->disp=%Ld," " offset=%Ld),(pvfs_disp=%Ld,off=%Ld)\n", fd->fp_ind, fd->disp, offset, pvfs_disp, off); #endif /* Convert the MPI memory and file datatypes into * PVFS2 datatypes */ ret = convert_mpi_pvfs2_dtype(&datatype, &tmp_mem_req); if (ret < 0) { goto error_state; } ret = convert_mpi_pvfs2_dtype(&(fd->filetype), &tmp_file_req); if (ret < 0) { goto error_state; } ret = PVFS_Request_contiguous(count, tmp_mem_req, &mem_req); if (ret != 0) /* TODO: convert this to MPIO error handling */ fprintf(stderr, "ADIOI_PVFS2_stridedDtypeIO: error in final" " CONTIG memory type\n"); PVFS_Request_free(&tmp_mem_req); /* pvfs_disp is used to offset the filetype */ ret = PVFS_Request_hindexed(1, &pvfs_blk, &pvfs_disp, tmp_file_req, &file_req); if (ret != 0) fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: error in final" " HINDEXED file type\n"); PVFS_Request_free(&tmp_file_req); if (rw_type == READ) ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, off, buf, mem_req, &(pvfs_fs->credentials), &resp_io); else ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, off, buf, mem_req, &(pvfs_fs->credentials), &resp_io); if (ret != 0) { fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: Warning - PVFS_sys_" "read/write returned %d and completed %Ld bytes.\n", ret, (long long)resp_io.total_completed); *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_io \n", 0); goto error_state; } if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind = off += resp_io.total_completed; } error_state: fd->fp_sys_posn = -1; /* set it to null. */ PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); #ifdef DEBUG_DTYPE fprintf(stderr, "ADIOI_PVFS2_StridedDtypeIO: " "resp_io.total_completed=%Ld,ret=%d\n", resp_io.total_completed, ret); #endif #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, resp_io.total_completed); /* This is a temporary way of filling in status. The right way is to * keep track of how much data was actually acccessed by * ADIOI_BUFFERED operations */ #endif return ret; }
void ADIOI_PVFS2_WriteContig(ADIO_File fd, const void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status * status, int *error_code) { int ret; MPI_Count datatype_size, len; PVFS_Request file_req, mem_req; PVFS_sysresp_io resp_io; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_WRITECONTIG"; pvfs_fs = (ADIOI_PVFS2_fs *) fd->fs_ptr; MPI_Type_size_x(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_Request_contiguous (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_Request_contiguous (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, offset, (void *) buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_write", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ fd->fp_sys_posn = offset + (int) resp_io.total_completed; } else { #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_a, 0, NULL); #endif ret = PVFS_sys_write(pvfs_fs->object_ref, file_req, fd->fp_ind, (void *) buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event(ADIOI_MPE_write_b, 0, NULL); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_write", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ fd->fp_ind += (int) resp_io.total_completed; fd->fp_sys_posn = fd->fp_ind; } #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, resp_io.total_completed); #endif *error_code = MPI_SUCCESS; fn_exit: PVFS_Request_free(&file_req); PVFS_Request_free(&mem_req); return; }
void ADIOI_PVFS2_ReadContig(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { int ret, datatype_size, len; PVFS_Request file_req, mem_req; PVFS_sysresp_io resp_io; ADIOI_PVFS2_fs *pvfs_fs; static char myname[] = "ADIOI_PVFS2_READCONTIG"; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; MPI_Type_size(datatype, &datatype_size); len = datatype_size * count; ret = PVFS_Request_contiguous(len, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (memory)", 0); return; } /* --END ERROR HANDLING-- */ ret = PVFS_Request_contiguous(len, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (ret != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in pvfs_request_contig (file)", 0); return; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { /* copy individual file pointer into offset variable, continue */ offset = fd->fp_ind; } #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_a, 0, NULL ); #endif ret = PVFS_sys_read(pvfs_fs->object_ref, file_req, offset, buf, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_read_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (ret != 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(ret), "Error in PVFS_sys_read", 0); goto fn_exit; } /* --END ERROR HANDLING-- */ if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind += (int) resp_io.total_completed; /* TODO: WHY THE INT CAST? */ } fd->fp_sys_posn = offset + (int)resp_io.total_completed; #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, (int)resp_io.total_completed); #endif *error_code = MPI_SUCCESS; fn_exit: PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); return; }
void ADIOI_PVFS2_OldWriteStrided(ADIO_File fd, void *buf, int count, MPI_Datatype datatype, int file_ptr_type, ADIO_Offset offset, ADIO_Status *status, int *error_code) { /* as with all the other WriteStrided functions, offset is in units of * etype relative to the filetype */ /* Since PVFS2 does not support file locking, can't do buffered writes as on Unix */ ADIOI_Flatlist_node *flat_buf, *flat_file; int i, j, k, bwr_size, fwr_size=0, st_index=0; int bufsize, sum, n_etypes_in_filetype, size_in_filetype; int n_filetypes, etype_in_filetype; ADIO_Offset abs_off_in_filetype=0; int filetype_size, etype_size, buftype_size; MPI_Aint filetype_extent, buftype_extent; int buf_count, buftype_is_contig, filetype_is_contig; ADIO_Offset off, disp, start_off, initial_off; int flag, st_fwr_size, st_n_filetypes; int err_flag=0; int mem_list_count, file_list_count; PVFS_size * mem_offsets; int64_t *file_offsets; int *mem_lengths; int32_t *file_lengths; int total_blks_to_write; int max_mem_list, max_file_list; int b_blks_wrote; int f_data_wrote; int size_wrote=0, n_write_lists, extra_blks; int end_bwr_size, end_fwr_size; int start_k, start_j, new_file_write, new_buffer_write; int start_mem_offset; PVFS_Request mem_req, file_req; ADIOI_PVFS2_fs * pvfs_fs; PVFS_sysresp_io resp_io; MPI_Offset total_bytes_written=0; static char myname[] = "ADIOI_PVFS2_WRITESTRIDED"; /* note: don't increase this: several parts of PVFS2 now * assume this limit*/ #define MAX_ARRAY_SIZE 64 /* --BEGIN ERROR HANDLING-- */ if (fd->atomicity) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, MPI_ERR_ARG, "Atomic noncontiguous writes are not supported by PVFS2", 0); return; } /* --END ERROR HANDLING-- */ ADIOI_Datatype_iscontig(datatype, &buftype_is_contig); ADIOI_Datatype_iscontig(fd->filetype, &filetype_is_contig); /* the HDF5 tests showed a bug in this list processing code (see many many * lines down below). We added a workaround, but common HDF5 file types * are actually contiguous and do not need the expensive workarond */ if (!filetype_is_contig) { flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; if (flat_file->count == 1 && !buftype_is_contig) filetype_is_contig = 1; } MPI_Type_size(fd->filetype, &filetype_size); if ( ! filetype_size ) { #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, 0); #endif *error_code = MPI_SUCCESS; return; } MPI_Type_extent(fd->filetype, &filetype_extent); MPI_Type_size(datatype, &buftype_size); MPI_Type_extent(datatype, &buftype_extent); etype_size = fd->etype_size; bufsize = buftype_size * count; pvfs_fs = (ADIOI_PVFS2_fs*)fd->fs_ptr; if (!buftype_is_contig && filetype_is_contig) { /* noncontiguous in memory, contiguous in file. */ int64_t file_offsets; int32_t file_lengths; ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; if (file_ptr_type == ADIO_EXPLICIT_OFFSET) { off = fd->disp + etype_size * offset; } else off = fd->fp_ind; file_list_count = 1; file_offsets = off; file_lengths = 0; total_blks_to_write = count*flat_buf->count; b_blks_wrote = 0; /* allocate arrays according to max usage */ if (total_blks_to_write > MAX_ARRAY_SIZE) mem_list_count = MAX_ARRAY_SIZE; else mem_list_count = total_blks_to_write; mem_offsets = (PVFS_size*)ADIOI_Malloc(mem_list_count*sizeof(PVFS_size)); mem_lengths = (int*)ADIOI_Malloc(mem_list_count*sizeof(int)); j = 0; /* step through each block in memory, filling memory arrays */ while (b_blks_wrote < total_blks_to_write) { for (i=0; i<flat_buf->count; i++) { mem_offsets[b_blks_wrote % MAX_ARRAY_SIZE] = /* TODO: fix this warning by casting to an integer that's * the same size as a char * and /then/ casting to * PVFS_size */ ((PVFS_size)buf + j*buftype_extent + flat_buf->indices[i]); mem_lengths[b_blks_wrote % MAX_ARRAY_SIZE] = flat_buf->blocklens[i]; file_lengths += flat_buf->blocklens[i]; b_blks_wrote++; if (!(b_blks_wrote % MAX_ARRAY_SIZE) || (b_blks_wrote == total_blks_to_write)) { /* in the case of the last write list call, adjust mem_list_count */ if (b_blks_wrote == total_blks_to_write) { mem_list_count = total_blks_to_write % MAX_ARRAY_SIZE; /* in case last write list call fills max arrays */ if (!mem_list_count) mem_list_count = MAX_ARRAY_SIZE; } err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (memory)", 0); break; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_contiguous(file_lengths, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (file)", 0); break; } /* --END ERROR HANDLING-- */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, file_offsets, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif total_bytes_written += resp_io.total_completed; /* in the case of error or the last write list call, * leave here */ /* --BEGIN ERROR HANDLING-- */ if (err_flag) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); break; } /* --END ERROR HANDLING-- */ if (b_blks_wrote == total_blks_to_write) break; file_offsets += file_lengths; file_lengths = 0; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } } /* for (i=0; i<flat_buf->count; i++) */ j++; } /* while (b_blks_wrote < total_blks_to_write) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); if (file_ptr_type == ADIO_INDIVIDUAL) fd->fp_ind += total_bytes_written; if (!err_flag) *error_code = MPI_SUCCESS; fd->fp_sys_posn = -1; /* clear this. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif ADIOI_Delete_flattened(datatype); return; } /* if (!buftype_is_contig && filetype_is_contig) */ /* already know that file is noncontiguous from above */ /* noncontiguous in file */ /* filetype already flattened in ADIO_Open */ flat_file = ADIOI_Flatlist; while (flat_file->type != fd->filetype) flat_file = flat_file->next; disp = fd->disp; initial_off = offset; /* for each case - ADIO_Individual pointer or explicit, find offset (file offset in bytes), n_filetypes (how many filetypes into file to start), fwr_size (remaining amount of data in present file block), and st_index (start point in terms of blocks in starting filetype) */ if (file_ptr_type == ADIO_INDIVIDUAL) { offset = fd->fp_ind; /* in bytes */ n_filetypes = -1; flag = 0; while (!flag) { n_filetypes++; for (i=0; i<flat_file->count; i++) { if (disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] >= offset) { st_index = i; fwr_size = disp + flat_file->indices[i] + ((ADIO_Offset) n_filetypes)*filetype_extent + flat_file->blocklens[i] - offset; flag = 1; break; } } } /* while (!flag) */ } /* if (file_ptr_type == ADIO_INDIVIDUAL) */ else { n_etypes_in_filetype = filetype_size/etype_size; n_filetypes = (int) (offset / n_etypes_in_filetype); etype_in_filetype = (int) (offset % n_etypes_in_filetype); size_in_filetype = etype_in_filetype * etype_size; sum = 0; for (i=0; i<flat_file->count; i++) { sum += flat_file->blocklens[i]; if (sum > size_in_filetype) { st_index = i; fwr_size = sum - size_in_filetype; abs_off_in_filetype = flat_file->indices[i] + size_in_filetype - (sum - flat_file->blocklens[i]); break; } } /* abs. offset in bytes in the file */ offset = disp + ((ADIO_Offset) n_filetypes)*filetype_extent + abs_off_in_filetype; } /* else [file_ptr_type != ADIO_INDIVIDUAL] */ start_off = offset; st_fwr_size = fwr_size; st_n_filetypes = n_filetypes; if (buftype_is_contig && !filetype_is_contig) { /* contiguous in memory, noncontiguous in file. should be the most common case. */ int mem_lengths; char *mem_offsets; i = 0; j = st_index; off = offset; n_filetypes = st_n_filetypes; mem_list_count = 1; /* determine how many blocks in file to write */ f_data_wrote = ADIOI_MIN(st_fwr_size, bufsize); total_blks_to_write = 1; if (j < (flat_file->count -1)) j++; else { j = 0; n_filetypes++; } while (f_data_wrote < bufsize) { f_data_wrote += flat_file->blocklens[j]; total_blks_to_write++; if (j<(flat_file->count-1)) j++; else j = 0; } j = st_index; n_filetypes = st_n_filetypes; n_write_lists = total_blks_to_write/MAX_ARRAY_SIZE; extra_blks = total_blks_to_write%MAX_ARRAY_SIZE; mem_offsets = buf; mem_lengths = 0; /* if at least one full writelist, allocate file arrays at max array size and don't free until very end */ if (n_write_lists) { file_offsets = (int64_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* sizeof(int64_t)); file_lengths = (int32_t*)ADIOI_Malloc(MAX_ARRAY_SIZE* sizeof(int32_t)); } /* if there's no full writelist allocate file arrays according to needed size (extra_blks) */ else { file_offsets = (int64_t*)ADIOI_Malloc(extra_blks* sizeof(int64_t)); file_lengths = (int32_t*)ADIOI_Malloc(extra_blks* sizeof(int32_t)); } /* for file arrays that are of MAX_ARRAY_SIZE, build arrays */ for (i=0; i<n_write_lists; i++) { file_list_count = MAX_ARRAY_SIZE; if(!i) { file_offsets[0] = offset; file_lengths[0] = st_fwr_size; mem_lengths = st_fwr_size; } for (k=0; k<MAX_ARRAY_SIZE; k++) { if (i || k) { file_offsets[k] = disp + ((ADIO_Offset)n_filetypes)*filetype_extent + flat_file->indices[j]; file_lengths[k] = flat_file->blocklens[j]; mem_lengths += file_lengths[k]; } if (j<(flat_file->count - 1)) j++; else { j = 0; n_filetypes++; } } /* for (k=0; k<MAX_ARRAY_SIZE; k++) */ err_flag = PVFS_Request_contiguous(mem_lengths, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (file)", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* PVFS_Request_hindexed already expresses the offsets into the * file, so we should not pass in an offset if we are using * hindexed for the file type */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ total_bytes_written += resp_io.total_completed; mem_offsets += mem_lengths; mem_lengths = 0; PVFS_Request_free(&file_req); PVFS_Request_free(&mem_req); } /* for (i=0; i<n_write_lists; i++) */ /* for file arrays smaller than MAX_ARRAY_SIZE (last write_list call) */ if (extra_blks) { file_list_count = extra_blks; if(!i) { file_offsets[0] = offset; file_lengths[0] = ADIOI_MIN(st_fwr_size, bufsize); } for (k=0; k<extra_blks; k++) { if(i || k) { file_offsets[k] = disp + ((ADIO_Offset)n_filetypes)*filetype_extent + flat_file->indices[j]; if (k == (extra_blks - 1)) { file_lengths[k] = bufsize - (int32_t) mem_lengths - (int32_t) mem_offsets + (int32_t) buf; } else file_lengths[k] = flat_file->blocklens[j]; } /* if(i || k) */ mem_lengths += file_lengths[k]; if (j<(flat_file->count - 1)) j++; else { j = 0; n_filetypes++; } } /* for (k=0; k<extra_blks; k++) */ err_flag = PVFS_Request_contiguous(mem_lengths, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_contiguous (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed(file)", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* as above, use 0 for 'offset' when using hindexed file type*/ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, mem_offsets, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ total_bytes_written += resp_io.total_completed; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } } else { /* noncontiguous in memory as well as in file */ ADIOI_Flatten_datatype(datatype); flat_buf = ADIOI_Flatlist; while (flat_buf->type != datatype) flat_buf = flat_buf->next; size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; bwr_size = flat_buf->blocklens[0]; buf_count = 0; start_mem_offset = 0; start_k = k = 0; start_j = st_index; max_mem_list = 0; max_file_list = 0; /* run through and file max_file_list and max_mem_list so that you can allocate the file and memory arrays less than MAX_ARRAY_SIZE if possible */ while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are equal, and the sum of all the data written and data to be written in the next immediate write list is less than bufsize */ if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } else new_buffer_write = bwr_size; } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; j--; } else { new_file_write += flat_file->blocklens[j]; end_fwr_size = flat_file->blocklens[j]; } } else { if (fwr_size > new_buffer_write) { new_file_write = new_buffer_write; fwr_size = new_file_write; } else new_file_write = fwr_size; } file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; k = start_k; if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; while (new_buffer_write < new_file_write) { if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { new_buffer_write = bwr_size; if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fakes filling the writelist arrays of lengths found above */ k = start_k; j = start_j; for (i=0; i<mem_list_count; i++) { if(i) { if (i == (mem_list_count - 1)) { if (flat_buf->blocklens[k] == end_bwr_size) bwr_size = flat_buf->blocklens[(k+1)% flat_buf->count]; else { bwr_size = flat_buf->blocklens[k] - end_bwr_size; k--; buf_count--; } } } buf_count++; k = (k + 1)%flat_buf->count; } /* for (i=0; i<mem_list_count; i++) */ for (i=0; i<file_list_count; i++) { if (i) { if (i == (file_list_count - 1)) { if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; } } } if (j < flat_file->count - 1) j++; else { j = 0; n_filetypes++; } } /* for (i=0; i<file_list_count; i++) */ size_wrote += new_buffer_write; start_k = k; start_j = j; if (max_mem_list < mem_list_count) max_mem_list = mem_list_count; if (max_file_list < file_list_count) max_file_list = file_list_count; } /* while (size_wrote < bufsize) */ /* one last check before we actually carry out the operation: * this code has hard-to-fix bugs when a noncontiguous file type has * such large pieces that the sum of the lengths of the memory type is * not larger than one of those pieces (and vice versa for large memory * types and many pices of file types. In these cases, give up and * fall back to naive reads and writes. The testphdf5 test created a * type with two very large memory regions and 600 very small file * regions. The same test also created a type with one very large file * region and many (700) very small memory regions. both cases caused * problems for this code */ if ( ( (file_list_count == 1) && (new_file_write < flat_file->blocklens[0] ) ) || ((mem_list_count == 1) && (new_buffer_write < flat_buf->blocklens[0]) ) || ((file_list_count == MAX_ARRAY_SIZE) && (new_file_write < flat_buf->blocklens[0]) ) || ( (mem_list_count == MAX_ARRAY_SIZE) && (new_buffer_write < flat_file->blocklens[0])) ) { ADIOI_Delete_flattened(datatype); ADIOI_GEN_WriteStrided_naive(fd, buf, count, datatype, file_ptr_type, initial_off, status, error_code); return; } mem_offsets = (PVFS_size*)ADIOI_Malloc(max_mem_list*sizeof(PVFS_size)); mem_lengths = (int *)ADIOI_Malloc(max_mem_list*sizeof(int)); file_offsets = (int64_t *)ADIOI_Malloc(max_file_list*sizeof(int64_t)); file_lengths = (int32_t *)ADIOI_Malloc(max_file_list*sizeof(int32_t)); size_wrote = 0; n_filetypes = st_n_filetypes; fwr_size = st_fwr_size; bwr_size = flat_buf->blocklens[0]; buf_count = 0; start_mem_offset = 0; start_k = k = 0; start_j = st_index; /* this section calculates mem_list_count and file_list_count and also finds the possibly odd sized last array elements in new_fwr_size and new_bwr_size */ while (size_wrote < bufsize) { k = start_k; new_buffer_write = 0; mem_list_count = 0; while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) { /* find mem_list_count and file_list_count such that both are less than MAX_ARRAY_SIZE, the sum of their lengths are equal, and the sum of all the data written and data to be written in the next immediate write list is less than bufsize */ if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k] + size_wrote) > bufsize) { end_bwr_size = new_buffer_write + flat_buf->blocklens[k] - (bufsize - size_wrote); new_buffer_write = bufsize - size_wrote; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } else new_buffer_write = bwr_size; } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ j = start_j; new_file_write = 0; file_list_count = 0; while ((file_list_count < MAX_ARRAY_SIZE) && (new_file_write < new_buffer_write)) { if(file_list_count) { if((new_file_write + flat_file->blocklens[j]) > new_buffer_write) { end_fwr_size = new_buffer_write - new_file_write; new_file_write = new_buffer_write; j--; } else { new_file_write += flat_file->blocklens[j]; end_fwr_size = flat_file->blocklens[j]; } } else { if (fwr_size > new_buffer_write) { new_file_write = new_buffer_write; fwr_size = new_file_write; } else new_file_write = fwr_size; } file_list_count++; if (j < (flat_file->count - 1)) j++; else j = 0; k = start_k; if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) { new_buffer_write = 0; mem_list_count = 0; while (new_buffer_write < new_file_write) { if(mem_list_count) { if((new_buffer_write + flat_buf->blocklens[k]) > new_file_write) { end_bwr_size = new_file_write - new_buffer_write; new_buffer_write = new_file_write; k--; } else { new_buffer_write += flat_buf->blocklens[k]; end_bwr_size = flat_buf->blocklens[k]; } } else { new_buffer_write = bwr_size; if (bwr_size > (bufsize - size_wrote)) { new_buffer_write = bufsize - size_wrote; bwr_size = new_buffer_write; } } mem_list_count++; k = (k + 1)%flat_buf->count; } /* while (new_buffer_write < new_file_write) */ } /* if ((new_file_write < new_buffer_write) && (file_list_count == MAX_ARRAY_SIZE)) */ } /* while ((mem_list_count < MAX_ARRAY_SIZE) && (new_buffer_write < bufsize-size_wrote)) */ /* fills the allocated writelist arrays */ k = start_k; j = start_j; for (i=0; i<mem_list_count; i++) { /* TODO: fix this warning by casting to an integer that's the * same size as a char * and /then/ casting to PVFS_size */ mem_offsets[i] = ((PVFS_size)buf + buftype_extent* (buf_count/flat_buf->count) + (int)flat_buf->indices[k]); if(!i) { mem_lengths[0] = bwr_size; mem_offsets[0] += flat_buf->blocklens[k] - bwr_size; } else { if (i == (mem_list_count - 1)) { mem_lengths[i] = end_bwr_size; if (flat_buf->blocklens[k] == end_bwr_size) bwr_size = flat_buf->blocklens[(k+1)% flat_buf->count]; else { bwr_size = flat_buf->blocklens[k] - end_bwr_size; k--; buf_count--; } } else { mem_lengths[i] = flat_buf->blocklens[k]; } } buf_count++; k = (k + 1)%flat_buf->count; } /* for (i=0; i<mem_list_count; i++) */ for (i=0; i<file_list_count; i++) { file_offsets[i] = disp + flat_file->indices[j] + ((ADIO_Offset)n_filetypes) * filetype_extent; if (!i) { file_lengths[0] = fwr_size; file_offsets[0] += flat_file->blocklens[j] - fwr_size; } else { if (i == (file_list_count - 1)) { file_lengths[i] = end_fwr_size; if (flat_file->blocklens[j] == end_fwr_size) fwr_size = flat_file->blocklens[(j+1)% flat_file->count]; else { fwr_size = flat_file->blocklens[j] - end_fwr_size; j--; } } else file_lengths[i] = flat_file->blocklens[j]; } if (j < flat_file->count - 1) j++; else { j = 0; n_filetypes++; } } /* for (i=0; i<file_list_count; i++) */ err_flag = PVFS_Request_hindexed(mem_list_count, mem_lengths, mem_offsets, PVFS_BYTE, &mem_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0 ) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed (memory)", 0); goto error_state; } /* --END ERROR HANDLING-- */ err_flag = PVFS_Request_hindexed(file_list_count, file_lengths, file_offsets, PVFS_BYTE, &file_req); /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_Request_hindexed", 0); goto error_state; } /* --END ERROR HANDLING-- */ /* offset will be expressed in memory and file datatypes */ #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_a, 0, NULL ); #endif err_flag = PVFS_sys_write(pvfs_fs->object_ref, file_req, 0, PVFS_BOTTOM, mem_req, &(pvfs_fs->credentials), &resp_io); #ifdef ADIOI_MPE_LOGGING MPE_Log_event( ADIOI_MPE_write_b, 0, NULL ); #endif /* --BEGIN ERROR HANDLING-- */ if (err_flag != 0) { *error_code = MPIO_Err_create_code(MPI_SUCCESS, MPIR_ERR_RECOVERABLE, myname, __LINE__, ADIOI_PVFS2_error_convert(err_flag), "Error in PVFS_sys_write", 0); goto error_state; } /* --END ERROR HANDLING-- */ size_wrote += new_buffer_write; total_bytes_written += resp_io.total_completed; start_k = k; start_j = j; PVFS_Request_free(&mem_req); PVFS_Request_free(&file_req); } /* while (size_wrote < bufsize) */ ADIOI_Free(mem_offsets); ADIOI_Free(mem_lengths); } /* when incrementing fp_ind, need to also take into account the file type: * consider an N-element 1-d subarray with a lb and ub: ( |---xxxxx-----| * if we wrote N elements, offset needs to point at beginning of type, not * at empty region at offset N+1). * * As we discussed on mpich-discuss in may/june 2009, the code below might * look wierd, but by putting fp_ind at the last byte written, the next * time we run through the strided code we'll update the fp_ind to the * right location. */ if (file_ptr_type == ADIO_INDIVIDUAL) { fd->fp_ind = file_offsets[file_list_count-1]+ file_lengths[file_list_count-1]; } ADIOI_Free(file_offsets); ADIOI_Free(file_lengths); *error_code = MPI_SUCCESS; error_state: fd->fp_sys_posn = -1; /* set it to null. */ #ifdef HAVE_STATUS_SET_BYTES MPIR_Status_set_bytes(status, datatype, bufsize); /* This is a temporary way of filling in status. The right way is to keep track of how much data was actually written by ADIOI_BUFFERED_WRITE. */ #endif if (!buftype_is_contig) ADIOI_Delete_flattened(datatype); }