static int builtin_posix_module_element_open (hio_dataset_t dataset, hio_element_t element) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module; hio_context_t context = hioi_object_context (&dataset->ds_object); int rc; if (HIO_FILE_MODE_BASIC == dataset->ds_fmode) { rc = builtin_posix_module_element_open_basic (posix_module, posix_dataset, element); if (HIO_SUCCESS != rc) { hioi_object_release (&element->e_object); return rc; } } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: %s element %p (identifier %s) for dataset %s", (HIO_FLAG_WRITE & dataset->ds_flags) ? "created" : "opened", element, hioi_object_identifier(element), hioi_object_identifier(dataset)); element->e_write_strided_nb = builtin_posix_module_element_write_strided_nb; element->e_read_strided_nb = builtin_posix_module_element_read_strided_nb; element->e_flush = builtin_posix_module_element_flush; element->e_complete = builtin_posix_module_element_complete; element->e_close = builtin_posix_module_element_close; return HIO_SUCCESS; }
static int builtin_posix_element_translate_strided (builtin_posix_module_t *posix_module, hio_element_t element, uint64_t offset, size_t *size, hio_file_t **file_out) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); size_t block_id, block_base, block_bound, block_offset, file_id, file_block; hio_context_t context = hioi_object_context (&element->e_object); hio_file_t *file; int32_t file_index; char *path; int rc; block_id = offset / posix_dataset->ds_bs; file_id = block_id % posix_dataset->ds_fcount; file_block = block_id / posix_dataset->ds_fcount; block_base = block_id * posix_dataset->ds_bs; block_bound = block_base + posix_dataset->ds_bs; block_offset = file_block * posix_dataset->ds_bs + offset - block_base; hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate_strided: element: %s, offset: %" PRIu64 ", file_id: %lu, file_block: %lu, block_offset: %lu, block_size: %" PRIu64, hioi_object_identifier(element), offset, file_id, file_id, block_offset, posix_dataset->ds_bs); if (offset + *size > block_bound) { *size = block_bound - offset; } rc = asprintf (&path, "%s/data/%s_block.%08lu", posix_dataset->base_path, hioi_object_identifier(element), (unsigned long) file_id); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } /* use crc as a hash to pick a file index to use */ file_index = file_id % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + file_index; if (file_id != file->f_bid || file->f_element != element) { if (file->f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0); } file->f_bid = -1; file->f_element = element; POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file), "file_open", file_id, 0); if (HIO_SUCCESS != rc) { return rc; } file->f_bid = file_id; } POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, block_offset, SEEK_SET), "file_seek", file->f_bid, block_offset); *file_out = file; return HIO_SUCCESS; }
static int builtin_posix_module_element_open_basic (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset, hio_element_t element) { const char *element_name = hioi_object_identifier(element); char *path; int rc; if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { rc = asprintf (&path, "%s/element_data.%s.%05d", posix_dataset->base_path, element_name, element->e_rank); } else { rc = asprintf (&path, "%s/element_data.%s", posix_dataset->base_path, element_name); } if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } rc = builtin_posix_open_file (posix_module, posix_dataset, path, &element->e_file); free (path); if (HIO_SUCCESS != rc) { return rc; } fseek (element->e_file.f_hndl, 0, SEEK_END); element->e_size = ftell (element->e_file.f_hndl); fseek (element->e_file.f_hndl, 0, SEEK_SET); return HIO_SUCCESS; }
static int builtin_posix_module_dataset_init (struct hio_module_t *module, builtin_posix_module_dataset_t *posix_dataset) { hio_context_t context = hioi_object_context ((hio_object_t) posix_dataset); int rc; rc = asprintf (&posix_dataset->base_path, "%s/%s.hio/%s/%lu", module->data_root, hioi_object_identifier(context), hioi_object_identifier (posix_dataset), (unsigned long) posix_dataset->base.ds_id); assert (0 < rc); for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { posix_dataset->files[i].f_bid = -1; posix_dataset->files[i].f_file.f_hndl = NULL; } return HIO_SUCCESS; }
static int builtin_posix_dataset_path (struct hio_module_t *module, char **path, const char *name, uint64_t set_id) { hio_context_t context = module->context; int rc; rc = asprintf (path, "%s/%s.hio/%s/%lu", module->data_root, hioi_object_identifier(context), name, (unsigned long) set_id); return (0 > rc) ? hioi_err_errno (errno) : HIO_SUCCESS; }
static void builtin_posix_trace (builtin_posix_module_dataset_t *posix_dataset, const char *event, int64_t value, uint64_t value2, uint64_t start, uint64_t stop) { if (NULL == posix_dataset->ds_trace_fh) { return; } fprintf (posix_dataset->ds_trace_fh, "%s::%" PRId64 ":%s:%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 "\n", hioi_object_identifier (&posix_dataset->base), posix_dataset->base.ds_id, event, value, value2, start, stop, stop - start); }
static int builtin_posix_module_dataset_init (struct hio_module_t *module, builtin_posix_module_dataset_t *posix_dataset) { hio_context_t context = hioi_object_context ((hio_object_t) posix_dataset); int rc; rc = asprintf (&posix_dataset->base_path, "%s/%s.hio/%s/%lu", module->data_root, hioi_object_identifier(context), hioi_object_identifier (posix_dataset), (unsigned long) posix_dataset->base.ds_id); assert (0 < rc); /* initialize posix dataset specific data */ for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { posix_dataset->files[i].f_bid = -1; posix_dataset->files[i].f_hndl = NULL; posix_dataset->files[i].f_fd = -1; } /* default to strided output mode */ posix_dataset->ds_fmode = HIO_FILE_MODE_STRIDED; hioi_config_add (context, &posix_dataset->base.ds_object, &posix_dataset->ds_fmode, "dataset_file_mode", HIO_CONFIG_TYPE_INT32, &hioi_dataset_file_modes, "Modes for writing dataset files. Valid values: (0: basic, 1: file_per_node, 2: strided)", 0); if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode && HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { /* strided mode only applies to shared datasets */ posix_dataset->ds_fmode = HIO_FILE_MODE_BASIC; } if (HIO_FILE_MODE_BASIC != posix_dataset->ds_fmode) { posix_dataset->ds_bs = 1ul << 23; hioi_config_add (context, &posix_dataset->base.ds_object, &posix_dataset->ds_bs, "dataset_block_size", HIO_CONFIG_TYPE_INT64, NULL, "Block size to use when writing in optimized mode (default: 8M)", 0); } return HIO_SUCCESS; }
static int builtin_posix_module_element_open_basic (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset, hio_element_t element) { const char *element_name = hioi_object_identifier(element); char *path; int rc; if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { rc = asprintf (&path, "%s/data/element_data.%s.%08d", posix_dataset->base_path, element_name, element->e_rank); } else { rc = asprintf (&path, "%s/data/element_data.%s", posix_dataset->base_path, element_name); } if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } if (access (path, R_OK)) { /* fall back on old naming scheme */ if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { rc = asprintf (&path, "%s/element_data.%s.%08d", posix_dataset->base_path, element_name, element->e_rank); } else { rc = asprintf (&path, "%s/element_data.%s", posix_dataset->base_path, element_name); } if (0 > rc) { return hioi_err_errno (errno); } } POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, &element->e_file), "file_open", 0, 0); free (path); if (HIO_SUCCESS != rc) { return rc; } #if BUILTIN_POSIX_USE_STDIO fseek (element->e_file.f_hndl, 0, SEEK_END); element->e_size = ftell (element->e_file.f_hndl); fseek (element->e_file.f_hndl, 0, SEEK_SET); #else element->e_size = lseek (element->e_file.f_fd, 0, SEEK_END); lseek (element->e_file.f_fd, 0, SEEK_SET); #endif return HIO_SUCCESS; }
int hio_config_set_value (hio_object_t object, const char *variable, const char *value) { int rc = HIO_SUCCESS; hio_var_t *var; int config_index; if (NULL == object || NULL == variable || NULL == value) { return HIO_ERR_BAD_PARAM; } hioi_object_lock (object); do { /* go ahead and push this value into the object's key-value store. if the * configuration parameter has not yet been registered it will be read from * this key-valye store after the file store is checked. */ hioi_config_list_kv_push (&object->config_set, hioi_object_identifier (object), object->type, variable, value); config_index = hioi_var_lookup (&object->configuration, variable); if (0 > config_index) { /* variable does not exist (yet). nothing more to do */ break; } var = object->configuration.vars + config_index; if (HIO_VAR_FLAG_READONLY & var->var_flags) { hioi_err_push (HIO_ERR_PERM, object, "could not set read-only parameter: %s", variable); rc = HIO_ERR_PERM; break; } rc = hioi_config_set_value_internal (hioi_object_context(object), var, value); } while (0); hioi_object_unlock (object); return rc; }
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); unsigned char *manifest = NULL; size_t manifest_size = 0; hio_fs_attr_t *fs_attr; uint64_t start, stop; int rc = HIO_SUCCESS; char *path = NULL; start = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x", hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context), dataset->ds_flags, dataset->ds_mode); rc = builtin_posix_module_dataset_init (module, posix_dataset); if (HIO_SUCCESS != rc) { return rc; } fs_attr = &posix_dataset->base.ds_fsattr; rc = hioi_fs_query (context, module->data_root, fs_attr); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &context->c_object, "posix: error querying the filesystem"); return rc; } if (fs_attr->fs_flags & HIO_FS_SUPPORTS_STRIPING) { hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_scount, "stripe_count", HIO_CONFIG_TYPE_UINT32, NULL, "Stripe count for all dataset " "data files", 0); if (fs_attr->fs_scount > fs_attr->fs_smax_count) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe count %u exceeds the available resources. " "adjusting to maximum %u", fs_attr->fs_scount, fs_attr->fs_smax_count); fs_attr->fs_scount = fs_attr->fs_smax_count; } hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_ssize, "stripe_size", HIO_CONFIG_TYPE_UINT64, NULL, "Stripe size for all dataset " "data files", 0); /* ensure the stripe size is a multiple of the stripe unit */ fs_attr->fs_ssize = fs_attr->fs_sunit * ((fs_attr->fs_ssize + fs_attr->fs_sunit - 1) / fs_attr->fs_sunit); if (fs_attr->fs_ssize > fs_attr->fs_smax_size) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe size %" PRIu64 " exceeds the maximum %" PRIu64 ". ", fs_attr->fs_ssize, fs_attr->fs_smax_size); fs_attr->fs_ssize = fs_attr->fs_smax_size; } hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_raid_level, "raid_level", HIO_CONFIG_TYPE_UINT64, NULL, "RAID level for dataset " "data files. Keep in mind that some filesystems only support 1/2 RAID " "levels", 0); if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) { fs_attr->fs_scount = 1; fs_attr->fs_ssize = dataset->ds_bs; fs_attr->fs_use_group_locking = true; } } do { if (0 != context->c_rank) { break; } if (dataset->ds_flags & HIO_FLAG_TRUNC) { /* blow away the existing dataset */ (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset), dataset->ds_id); /* ensure we take the create path later */ dataset->ds_flags |= HIO_FLAG_CREAT; } if (!(dataset->ds_flags & HIO_FLAG_CREAT)) { /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */ rc = asprintf (&path, "%s/manifest.json.bz2", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { free (path); rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest"); rc = HIO_ERR_NOT_FOUND; break; } } rc = hioi_manifest_read (path, &manifest, &manifest_size); free (path); } else { rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset); if (HIO_SUCCESS != rc) { break; } rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, true); } } while (0); /* share dataset information will all processes in the communication domain */ rc = hioi_dataset_scatter (dataset, manifest, manifest_size, rc); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } free (manifest); if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) { if (HIO_SET_ELEMENT_UNIQUE == dataset->ds_mode || 2 > context->c_size || NULL == dataset->ds_shared_control) { posix_dataset->base.ds_fmode = HIO_FILE_MODE_BASIC; /* NTH: no optimized mode for N->N yet */ hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this " "dataset mode. falling back to basic file mode"); } } dataset->ds_module = module; dataset->ds_close = builtin_posix_module_dataset_close; dataset->ds_element_open = builtin_posix_module_element_open; dataset->ds_process_reqs = builtin_posix_module_process_reqs; pthread_mutex_init (&posix_dataset->lock, NULL); /* record the open time */ gettimeofday (&dataset->ds_otime, NULL); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset %s:%llu on data root %s. " "open time %lu usec", (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); return HIO_SUCCESS; }
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset, size_t *size, hio_file_t **file_out, bool reading) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); builtin_posix_file_t *file; uint64_t file_offset; int file_index; char *path; int rc; hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %ld size %lu", hioi_object_identifier (&element->e_object), offset, *size); rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size); if (HIO_SUCCESS != rc) { if (reading) { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset not found"); /* not found */ return rc; } if (hioi_context_using_mpi (context)) { rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, posix_dataset->base.ds_shared_control->s_master); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } else { rc = asprintf (&path, "%s/data", posix_dataset->base_path); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } file_offset = builtin_posix_reserve (posix_dataset, size); file_index = hioi_dataset_add_file (&posix_dataset->base, strrchr (path, '/') + 1); hioi_element_add_segment (element, file_index, file_offset, offset, *size); } else { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ index %d, offset %lu, size %lu", file_index, file_offset, *size); rc = asprintf (&path, "%s/%s", posix_dataset->base_path, posix_dataset->base.ds_flist[file_index].f_name); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } /* use crc as a hash to pick a file index to use */ int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + internal_index; if (internal_index != file->f_bid) { if (NULL != file->f_file.f_hndl) { fclose (file->f_file.f_hndl); file->f_file.f_hndl = NULL; file->f_bid = -1; } rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file); if (HIO_SUCCESS != rc) { free (path); return rc; } file->f_bid = file_index; } free (path); if (file_offset != file->f_file.f_offset) { fseek (file->f_file.f_hndl, file_offset, SEEK_SET); file->f_file.f_offset = file_offset; } *file_out = &file->f_file; return HIO_SUCCESS; }
static int builtin_posix_module_dataset_list (struct hio_module_t *module, const char *name, hio_dataset_header_t **headers, int *count) { hio_context_t context = module->context; int num_set_ids = 0, set_id_index = 0; int rc = HIO_SUCCESS; struct dirent *dp; char *path = NULL; DIR *dir; *headers = NULL; *count = 0; do { if (0 != context->c_rank) { break; } rc = asprintf (&path, "%s/%s.hio/%s", module->data_root, hioi_object_identifier(context), name); assert (0 <= rc); dir = opendir (path); if (NULL == dir) { num_set_ids = 0; break; } while (NULL != (dp = readdir (dir))) { if (dp->d_name[0] != '.') { num_set_ids++; } } *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers)); assert (NULL != *headers); rewinddir (dir); while (NULL != (dp = readdir (dir))) { if ('.' == dp->d_name[0]) { continue; } char *manifest_path; rc = asprintf (&manifest_path, "%s/%s/manifest.json.bz2", path, dp->d_name); assert (0 <= rc); rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path); if (HIO_SUCCESS == rc) { ++set_id_index; } else { free (manifest_path); rc = asprintf (&manifest_path, "%s/%s/manifest.json", path, dp->d_name); assert (0 <= rc); rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path); if (HIO_SUCCESS == rc) { ++set_id_index; } else { /* skip dataset */ hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_list: could not read manifest at path: %s. rc: %d", manifest_path, rc); } } free (manifest_path); } num_set_ids = set_id_index; } while (0); #if HIO_USE_MPI if (hioi_context_using_mpi (context)) { MPI_Bcast (&num_set_ids, 1, MPI_INT, 0, context->c_comm); } #endif if (0 == context->c_rank) { closedir (dir); free (path); } if (0 == num_set_ids) { free (*headers); *headers = NULL; return HIO_SUCCESS; } if (0 != context->c_rank) { *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers)); assert (NULL != *headers); } #if HIO_USE_MPI if (hioi_context_using_mpi (context)) { MPI_Bcast (*headers, sizeof (**headers) * num_set_ids, MPI_BYTE, 0, context->c_comm); } #endif *count = num_set_ids; return HIO_SUCCESS; }
static int builtin_posix_element_translate_opt_old (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset, size_t *size, hio_file_t **file_out) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); size_t block_id, block_base, block_bound, block_offset; builtin_posix_file_t *file; int32_t file_index; char *path; int rc, foo; block_id = offset / posix_dataset->base.ds_bs; block_base = block_id * posix_dataset->base.ds_bs; block_bound = block_base + posix_dataset->base.ds_bs; block_offset = offset - block_base; hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate: element: %s, offset: %lu, block_id: %lu, " "block_offset: %lu, block_size: %lu", hioi_object_identifier(element), (unsigned long) offset, block_id, block_offset, posix_dataset->base.ds_bs); if (offset + *size > block_bound) { *size = block_bound - offset; } rc = asprintf (&path, "%s_block.%lu", hioi_object_identifier(element), (unsigned long) block_id); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) { foo = hioi_dataset_add_file (&posix_dataset->base, path); } char *tmp = path; rc = asprintf (&path, "%s/%s", posix_dataset->base_path, tmp); free (tmp); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } /* use crc as a hash to pick a file index to use */ file_index = hioi_crc32 ((uint8_t *) path, strlen (path)) % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + file_index; if (block_id != file->f_bid || file->f_element != element) { if (file->f_file.f_hndl != NULL) { fclose (file->f_file.f_hndl); file->f_file.f_hndl = NULL; file->f_bid = -1; } file->f_element = element; rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file); if (HIO_SUCCESS != rc) { return rc; } file->f_bid = block_id; } if (block_offset != file->f_file.f_offset) { fseek (file->f_file.f_hndl, block_offset, SEEK_SET); file->f_file.f_offset = block_offset; } if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) { hioi_element_add_segment (element, foo, block_offset, offset, *size); } *file_out = &file->f_file; return HIO_SUCCESS; }
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); unsigned char *manifest = NULL; size_t manifest_size = 0; uint64_t start, stop; int rc = HIO_SUCCESS; char *path = NULL; start = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x", hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context), dataset->ds_flags, dataset->ds_mode); rc = builtin_posix_module_dataset_init (module, posix_dataset); if (HIO_SUCCESS != rc) { return rc; } rc = builtin_posix_module_setup_striping (context, module, dataset); if (HIO_SUCCESS != rc) { return rc; } if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode) { hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_fcount, "dataset_file_count", HIO_CONFIG_TYPE_UINT64, NULL, "Number of files to use " "in strided file mode", 0); } else if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { posix_dataset->ds_use_bzip = true; hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_use_bzip, "dataset_use_bzip", HIO_CONFIG_TYPE_BOOL, NULL, "Use bzip2 compression for dataset manifests", 0); } if (dataset->ds_flags & HIO_FLAG_TRUNC) { /* blow away the existing dataset */ if (0 == context->c_rank) { (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset), dataset->ds_id); } } if (!(dataset->ds_flags & HIO_FLAG_CREAT)) { if (0 == context->c_rank) { /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */ rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { /* this should never happen on a valid dataset */ free (path); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest %s", path); rc = HIO_ERR_NOT_FOUND; } else { rc = HIO_SUCCESS; } } /* read the manifest if it exists */ if (HIO_SUCCESS == rc) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: loading manifest header from %s...", path); rc = hioi_manifest_read (path, &manifest, &manifest_size); free (path); path = NULL; } } else if (0 == context->c_rank) { rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset); if (HIO_SUCCESS == rc) { /* serialize the manifest to send to remote ranks */ rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, false, false); } } #if HIO_MPI_HAVE(1) /* share dataset header will all processes in the communication domain */ rc = hioi_dataset_scatter_comm (dataset, context->c_comm, manifest, manifest_size, rc); #endif free (manifest); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } if (context->c_enable_tracing) { char *path; rc = asprintf (&path, "%s/trace/trace.%d", posix_dataset->base_path, context->c_rank); if (rc > 0) { posix_dataset->ds_trace_fh = fopen (path, "a"); free (path); } builtin_posix_trace (posix_dataset, "trace_begin", 0, 0, 0, 0); } #if HIO_MPI_HAVE(3) if (!(dataset->ds_flags & HIO_FLAG_CREAT) && HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { rc = bultin_posix_scatter_data (posix_dataset); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } } /* if possible set up a shared memory window for this dataset */ POSIX_TRACE_CALL(posix_dataset, hioi_dataset_shared_init (dataset, 1), "shared_init", 0, 0); if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { if (2 > context->c_size || NULL == dataset->ds_shared_control) { /* no point in using optimized mode in this case */ posix_dataset->ds_fmode = HIO_FILE_MODE_BASIC; hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this " "dataset mode. falling back to basic file mode, path: %s", posix_dataset->base_path); } else if (HIO_SET_ELEMENT_SHARED == dataset->ds_mode) { POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_generate_map (dataset), "generate_map", 0, 0); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } } } /* NTH: if requested more code is needed to load an optimized dataset with an older MPI */ #endif /* HIO_MPI_HAVE(3) */ dataset->ds_module = module; dataset->ds_close = builtin_posix_module_dataset_close; dataset->ds_element_open = builtin_posix_module_element_open; dataset->ds_process_reqs = builtin_posix_module_process_reqs; /* record the open time */ gettimeofday (&dataset->ds_otime, NULL); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset " "%s:%" PRIu64 " on data root %s. open time %" PRIu64 " usec", (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); builtin_posix_trace (posix_dataset, "open", 0, 0, start, stop); return HIO_SUCCESS; }
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; hio_context_t context = hioi_object_context ((hio_object_t) dataset); hio_module_t *module = dataset->ds_module; unsigned char *manifest = NULL; uint64_t start, stop; int rc = HIO_SUCCESS; size_t manifest_size; start = hioi_gettime (); for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { if (posix_dataset->files[i].f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (posix_dataset->files + i), "file_close", posix_dataset->files[i].f_bid, 0); } } #if HIO_MPI_HAVE(3) /* release the shared state if it was allocated */ (void) hioi_dataset_shared_fini (dataset); /* release the dataset map if one was allocated */ (void) hioi_dataset_map_release (dataset); #endif if (dataset->ds_flags & HIO_FLAG_WRITE) { char *path; /* write manifest header */ POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, false, true), "gather_manifest", 0, 0); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (0 == context->c_rank) { rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); if (0 > rc) { /* out of memory. not much we can do now */ return hioi_err_errno (errno); } rc = hioi_manifest_save (dataset, manifest, manifest_size, path); free (manifest); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } #if HIO_MPI_HAVE(3) if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { /* optimized mode requires a data manifest to describe how the data landed on the filesystem */ POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest_comm (dataset, context->c_shared_comm, &manifest, &manifest_size, posix_dataset->ds_use_bzip, false), "gather_manifest", 0, 0); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (NULL != manifest) { rc = asprintf (&path, "%s/manifest.%x.json%s", posix_dataset->base_path, context->c_rank, posix_dataset->ds_use_bzip ? ".bz2" : ""); if (0 > rc) { return hioi_err_errno (errno); } rc = hioi_manifest_save (dataset, manifest, manifest_size, path); free (manifest); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } } #endif } #if HIO_MPI_HAVE(1) /* ensure all ranks have closed the dataset before continuing */ if (hioi_context_using_mpi (context)) { MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm); } #endif free (posix_dataset->base_path); stop = hioi_gettime (); builtin_posix_trace (posix_dataset, "close", 0, 0, start, stop); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset " "%s:%" PRIu64 " on data root %s. close time %" PRIu64 " usec", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); builtin_posix_trace (posix_dataset, "trace_end", 0, 0, 0, 0); if (posix_dataset->ds_trace_fh) { fclose (posix_dataset->ds_trace_fh); } return rc; }
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element, uint64_t offset, size_t *size, hio_file_t **file_out, bool reading) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); hio_file_t *file; uint64_t file_offset; int file_index = 0; char *path; int rc; hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %" PRIu64 " size %lu", hioi_object_identifier (&element->e_object), offset, *size); POSIX_TRACE_CALL(posix_dataset, rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size), "translate_offset", offset, *size); #if HIO_MPI_HAVE(3) if (HIO_SUCCESS != rc && reading) { POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_map_translate_offset (element, offset, &file_index, &file_offset, size), "map_translate_offset", offset, *size); } #endif if (HIO_SUCCESS != rc) { if (reading) { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset %" PRIu64 " not found", offset); /* not found */ return rc; } file_offset = builtin_posix_reserve (posix_dataset, size); if (hioi_context_using_mpi (context)) { file_index = posix_dataset->base.ds_shared_control->s_master; } else { file_index = 0; } rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } hioi_element_add_segment (element, file_index, file_offset, offset, *size); } else { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ rank %d, offset %" PRIu64 ", size %lu", file_index, file_offset, *size); rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } if (access (path, R_OK)) { free (path); rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } } /* use crc as a hash to pick a file index to use */ int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + internal_index; if (file_index != file->f_bid) { if (file->f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0); } file->f_bid = -1; POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file), "file_open", file_index, 0); if (HIO_SUCCESS != rc) { free (path); return rc; } file->f_bid = file_index; } free (path); POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, file_offset, SEEK_SET), "file_seek", file->f_bid, file_offset); *file_out = file; return HIO_SUCCESS; }
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); hio_module_t *module = dataset->ds_module; unsigned char *manifest; uint64_t start, stop; int rc = HIO_SUCCESS; size_t manifest_size; start = hioi_gettime (); for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { if (posix_dataset->files[i].f_file.f_hndl != NULL) { fclose (posix_dataset->files[i].f_file.f_hndl); posix_dataset->files[i].f_file.f_hndl = NULL; } } if (dataset->ds_flags & HIO_FLAG_WRITE) { rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, dataset->ds_use_bzip); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (0 == context->c_rank) { char *path; rc = asprintf (&path, "%s/manifest.json%s", posix_dataset->base_path, dataset->ds_use_bzip ? ".bz2" : ""); if (0 < rc) { int fd; errno = 0; fd = open (path, O_CREAT | O_WRONLY, posix_module->access_mode); if (0 <= fd) { (void) write (fd, manifest, manifest_size); close (fd); } free (manifest); rc = hioi_err_errno (errno); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } else { rc = HIO_ERR_OUT_OF_RESOURCE; } } } #if HIO_USE_MPI /* ensure all ranks have closed the dataset before continuing */ if (hioi_context_using_mpi (context)) { MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm); } #endif free (posix_dataset->base_path); pthread_mutex_destroy (&posix_dataset->lock); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset %s:%llu on data root %s. " "close time %lu usec", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); return rc; }