static int builtin_posix_create_dataset_dirs (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset) { mode_t access_mode = posix_module->access_mode; hio_context_t context = posix_module->base.context; int rc; if (context->c_rank > 0) { return HIO_SUCCESS; } hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix: creating dataset directory @ %s", posix_dataset->base_path); rc = hio_mkpath (context, posix_dataset->base_path, access_mode); if (0 > rc || EEXIST == errno) { if (EEXIST != errno) { hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s", posix_dataset->base_path); } return hioi_err_errno (errno); } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: successfully created dataset directories"); return HIO_SUCCESS; }
static int builtin_posix_element_translate_strided (builtin_posix_module_t *posix_module, hio_element_t element, uint64_t offset, size_t *size, hio_file_t **file_out) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); size_t block_id, block_base, block_bound, block_offset, file_id, file_block; hio_context_t context = hioi_object_context (&element->e_object); hio_file_t *file; int32_t file_index; char *path; int rc; block_id = offset / posix_dataset->ds_bs; file_id = block_id % posix_dataset->ds_fcount; file_block = block_id / posix_dataset->ds_fcount; block_base = block_id * posix_dataset->ds_bs; block_bound = block_base + posix_dataset->ds_bs; block_offset = file_block * posix_dataset->ds_bs + offset - block_base; hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate_strided: element: %s, offset: %" PRIu64 ", file_id: %lu, file_block: %lu, block_offset: %lu, block_size: %" PRIu64, hioi_object_identifier(element), offset, file_id, file_id, block_offset, posix_dataset->ds_bs); if (offset + *size > block_bound) { *size = block_bound - offset; } rc = asprintf (&path, "%s/data/%s_block.%08lu", posix_dataset->base_path, hioi_object_identifier(element), (unsigned long) file_id); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } /* use crc as a hash to pick a file index to use */ file_index = file_id % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + file_index; if (file_id != file->f_bid || file->f_element != element) { if (file->f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0); } file->f_bid = -1; file->f_element = element; POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file), "file_open", file_id, 0); if (HIO_SUCCESS != rc) { return rc; } file->f_bid = file_id; } POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, block_offset, SEEK_SET), "file_seek", file->f_bid, block_offset); *file_out = file; return HIO_SUCCESS; }
static int builtin_posix_module_element_open (hio_dataset_t dataset, hio_element_t element) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module; hio_context_t context = hioi_object_context (&dataset->ds_object); int rc; if (HIO_FILE_MODE_BASIC == dataset->ds_fmode) { rc = builtin_posix_module_element_open_basic (posix_module, posix_dataset, element); if (HIO_SUCCESS != rc) { hioi_object_release (&element->e_object); return rc; } } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: %s element %p (identifier %s) for dataset %s", (HIO_FLAG_WRITE & dataset->ds_flags) ? "created" : "opened", element, hioi_object_identifier(element), hioi_object_identifier(dataset)); element->e_write_strided_nb = builtin_posix_module_element_write_strided_nb; element->e_read_strided_nb = builtin_posix_module_element_read_strided_nb; element->e_flush = builtin_posix_module_element_flush; element->e_complete = builtin_posix_module_element_complete; element->e_close = builtin_posix_module_element_close; return HIO_SUCCESS; }
static int builtin_posix_module_dataset_unlink (struct hio_module_t *module, const char *name, int64_t set_id) { struct stat statinfo; char *path = NULL; int rc; if (module->context->c_rank) { return HIO_ERR_NOT_AVAILABLE; } rc = builtin_posix_dataset_path (module, &path, name, set_id); if (HIO_SUCCESS != rc) { return rc; } if (stat (path, &statinfo)) { free (path); return hioi_err_errno (errno); } hioi_log (module->context, HIO_VERBOSE_DEBUG_LOW, "posix: unlinking existing dataset %s::%llu", name, set_id); /* use tree walk depth-first to remove all of the files for this dataset */ rc = nftw (path, builtin_posix_unlink_cb, 32, FTW_DEPTH | FTW_PHYS); free (path); if (0 > rc) { hioi_err_push (hioi_err_errno (errno), &module->context->c_object, "posix: could not unlink dataset. errno: %d", errno); return hioi_err_errno (errno); } return HIO_SUCCESS; }
static int hioi_fs_open_datawarp (hio_context_t context, const char *path, hio_fs_attr_t *fs_attr, int flags, int mode) { #if defined(DW_SUPER_MAGIC) int rc, fd; fd = open (path, flags, mode); if (-1 == fd && EEXIST == errno) { flags &= ~O_CREAT; fd = open (path, flags); if (fd < 0) { return hioi_err_errno (errno); } return fd; } if (fd >= 0) { if (flags & O_CREAT) { rc = dw_set_stripe_configuration (fd, fs_attr->fs_ssize, fs_attr->fs_scount); if (0 != rc) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "datawarp: could not set file striping parameters: " "errno = %d", -rc); } } return fd; } return hioi_err_errno (errno); #else return hioi_fs_open_posix (context, path, fs_attr, flags, mode); #endif }
static int hioi_config_set_from_env (hio_context_t context, hio_object_t object, hio_var_t *var) { char *string_value; char env_name[256]; if (HIO_OBJECT_TYPE_DATASET == object->type) { /* check for dataset specific variables */ snprintf (env_name, 256, "%sdataset_%s_%s_%s", hio_config_env_prefix, context->c_object.identifier, object->identifier, var->var_name); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name); string_value = getenv (env_name); if (NULL != string_value) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s", var->var_name, string_value, env_name); return hioi_config_set_value_internal (context, var, string_value); } snprintf (env_name, 256, "%sdataset_%s_%s", hio_config_env_prefix, object->identifier, var->var_name); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name); string_value = getenv (env_name); if (NULL != string_value) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s", var->var_name, string_value, env_name); return hioi_config_set_value_internal (context, var, string_value); } } snprintf (env_name, 256, "%scontext_%s_%s", hio_config_env_prefix, context->c_object.identifier, var->var_name); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name); string_value = getenv (env_name); if (NULL != string_value) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s", var->var_name, string_value, env_name); return hioi_config_set_value_internal (context, var, string_value); } snprintf (env_name, 256, "%s%s", hio_config_env_prefix, var->var_name); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name); string_value = getenv (env_name); if (NULL != string_value) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s", var->var_name, string_value, env_name); return hioi_config_set_value_internal (context, var, string_value); } return HIO_SUCCESS; }
static int builtin_posix_module_fini (struct hio_module_t *module) { hioi_log (module->context, HIO_VERBOSE_DEBUG_LOW, "posix: finalizing module for data root %s", module->data_root); free (module->data_root); free (module); return HIO_SUCCESS; }
int hio_mkpath (hio_context_t context, const char *path, mode_t access_mode) { char *tmp = strdup (path); int rc; if (NULL == tmp) { return HIO_ERR_OUT_OF_RESOURCE; } for (char *sep = strchr (tmp, '/') ; sep ; sep = strchr (sep + 1, '/')) { if (sep == tmp) { continue; } *sep = '\0'; errno = 0; if (access (tmp, F_OK)) { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "creating directory %s with permissions 0%o", tmp, access_mode); rc = mkdir (tmp, access_mode); if (0 != rc && (EEXIST != errno)) { hioi_log (context, HIO_VERBOSE_WARN, "could not create directory %s. errno: %d", tmp, errno); free (tmp); return HIO_ERROR; } } else { errno = EEXIST; } *sep = '/'; } errno = 0; hioi_log (context, HIO_VERBOSE_DEBUG_MED, "creating directory %s with permissions 0%o", tmp, access_mode); rc = mkdir (tmp, access_mode); free (tmp); return (rc && errno != EEXIST) ? HIO_ERROR : HIO_SUCCESS; }
/** * Search for a matching value in the configuration file. * * @param[in] context context to search * @param[in] object associated object * @param[in] var variable to set * * This function currently does a linear search of the configuration * file. In the future this should be replaced with a hash table or * similar structure. */ static int hioi_config_set_from_kv_list (hio_config_kv_list_t *list, hio_object_t object, hio_var_t *var) { hio_context_t context = hioi_object_context (object); for (int i = 0 ; i < list->kv_list_count ; ++i) { hio_config_kv_t *kv = list->kv_list + i; if ((HIO_OBJECT_TYPE_ANY == kv->object_type || object->type == kv->object_type) && (NULL == kv->object_identifier || !strcmp (object->identifier, kv->object_identifier)) && !strcmp (var->var_name, kv->key)) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from file", var->var_name, kv->value); return hioi_config_set_value_internal (context, var, kv->value); } } return HIO_SUCCESS; }
void hioi_err_push (int hrc, hio_object_t object, char *format, ...) { hio_context_t context = object ? hioi_object_context (object) : NULL; hio_error_stack_item_t *new_item; va_list vargs; int rc; new_item = calloc (1, sizeof (hio_error_stack_item_t)); if (NULL == new_item) { /* not much can be done here. we are just plain OOM. */ return; } va_start (vargs, format); rc = vasprintf (&new_item->error_string, format, vargs); va_end (vargs); if (0 >= rc) { /* couldn't allocate error string */ free (new_item); return; } if (context) { hioi_log (context, HIO_VERBOSE_ERROR, "%s", new_item->error_string); } new_item->hrc = hrc; /* push the error message onto the stack */ if (NULL == context) { pthread_mutex_lock (&hio_error_stack_mutex); new_item->next = hio_error_stack_head; hio_error_stack_head = new_item; pthread_mutex_unlock (&hio_error_stack_mutex); } else { hioi_object_lock (&context->c_object); new_item->next = (hio_error_stack_item_t *) context->c_estack; context->c_estack = (void *) new_item; hioi_object_unlock (&context->c_object); } }
static int builtin_posix_component_query (hio_context_t context, const char *data_root, const char *next_data_root, hio_module_t **module) { builtin_posix_module_t *new_module; if (0 == strncasecmp("datawarp", data_root, 8) || 0 == strncasecmp("dw", data_root, 2)) { return HIO_ERR_NOT_AVAILABLE; } if (0 == strncasecmp("posix:", data_root, 6)) { /* skip posix: */ data_root += 6; } if (access (data_root, F_OK)) { hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: data root %s does not exist or can not be accessed", data_root); return hioi_err_errno (errno); } new_module = calloc (1, sizeof (builtin_posix_module_t)); if (NULL == new_module) { return HIO_ERR_OUT_OF_RESOURCE; } memcpy (new_module, &builtin_posix_module_template, sizeof (builtin_posix_module_template)); new_module->base.data_root = strdup (data_root); new_module->base.context = context; /* get the current umask */ new_module->access_mode = umask (0); umask (new_module->access_mode); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: created module for data root %s. using umask %o", data_root, new_module->access_mode); new_module->access_mode ^= 0777; *module = &new_module->base; return HIO_SUCCESS; }
static ssize_t builtin_posix_module_element_write_strided_internal (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset, const void *ptr, size_t count, size_t size, size_t stride) { hio_dataset_t dataset = hioi_element_dataset (element); size_t bytes_written = 0, ret; hio_file_t *file; uint64_t stop, start; int rc; assert (dataset->ds_flags & HIO_FLAG_WRITE); if (!(count * size)) { return 0; } if (0 == stride) { size *= count; count = 1; } start = hioi_gettime (); errno = 0; for (size_t i = 0 ; i < count ; ++i) { size_t req = size, actual; do { actual = req; rc = builtin_posix_element_translate (posix_module, element, offset, &actual, &file, false); assert (file); if (HIO_SUCCESS != rc) { break; } ret = fwrite (ptr, 1, actual, file->f_hndl); if (ret > 0) { bytes_written += ret; file->f_offset += ret; } if (ret < actual) { /* short write */ break; } req -= actual; offset += actual; ptr = (void *) ((intptr_t) ptr + actual); } while (req); if (HIO_SUCCESS != rc || req) { break; } ptr = (void *) ((intptr_t) ptr + stride); } if (0 == bytes_written || HIO_SUCCESS != rc) { if (0 == bytes_written) { rc = hioi_err_errno (errno); } dataset->ds_status = rc; return rc; } if (offset + bytes_written > element->e_size) { element->e_size = offset + bytes_written; } stop = hioi_gettime (); dataset->ds_stat.s_wtime += stop - start; if (0 < bytes_written) { dataset->ds_stat.s_bwritten += bytes_written; } hioi_log (hioi_object_context (&element->e_object), HIO_VERBOSE_DEBUG_LOW, "posix: finished write. bytes written: " "%lu, time: %llu usec", bytes_written, stop - start); return bytes_written; }
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset, size_t *size, hio_file_t **file_out, bool reading) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); builtin_posix_file_t *file; uint64_t file_offset; int file_index; char *path; int rc; hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %ld size %lu", hioi_object_identifier (&element->e_object), offset, *size); rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size); if (HIO_SUCCESS != rc) { if (reading) { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset not found"); /* not found */ return rc; } if (hioi_context_using_mpi (context)) { rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, posix_dataset->base.ds_shared_control->s_master); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } else { rc = asprintf (&path, "%s/data", posix_dataset->base_path); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } file_offset = builtin_posix_reserve (posix_dataset, size); file_index = hioi_dataset_add_file (&posix_dataset->base, strrchr (path, '/') + 1); hioi_element_add_segment (element, file_index, file_offset, offset, *size); } else { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ index %d, offset %lu, size %lu", file_index, file_offset, *size); rc = asprintf (&path, "%s/%s", posix_dataset->base_path, posix_dataset->base.ds_flist[file_index].f_name); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } /* use crc as a hash to pick a file index to use */ int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + internal_index; if (internal_index != file->f_bid) { if (NULL != file->f_file.f_hndl) { fclose (file->f_file.f_hndl); file->f_file.f_hndl = NULL; file->f_bid = -1; } rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file); if (HIO_SUCCESS != rc) { free (path); return rc; } file->f_bid = file_index; } free (path); if (file_offset != file->f_file.f_offset) { fseek (file->f_file.f_hndl, file_offset, SEEK_SET); file->f_file.f_offset = file_offset; } *file_out = &file->f_file; return HIO_SUCCESS; }
static int builtin_posix_element_translate_opt_old (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset, size_t *size, hio_file_t **file_out) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); size_t block_id, block_base, block_bound, block_offset; builtin_posix_file_t *file; int32_t file_index; char *path; int rc, foo; block_id = offset / posix_dataset->base.ds_bs; block_base = block_id * posix_dataset->base.ds_bs; block_bound = block_base + posix_dataset->base.ds_bs; block_offset = offset - block_base; hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate: element: %s, offset: %lu, block_id: %lu, " "block_offset: %lu, block_size: %lu", hioi_object_identifier(element), (unsigned long) offset, block_id, block_offset, posix_dataset->base.ds_bs); if (offset + *size > block_bound) { *size = block_bound - offset; } rc = asprintf (&path, "%s_block.%lu", hioi_object_identifier(element), (unsigned long) block_id); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) { foo = hioi_dataset_add_file (&posix_dataset->base, path); } char *tmp = path; rc = asprintf (&path, "%s/%s", posix_dataset->base_path, tmp); free (tmp); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } /* use crc as a hash to pick a file index to use */ file_index = hioi_crc32 ((uint8_t *) path, strlen (path)) % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + file_index; if (block_id != file->f_bid || file->f_element != element) { if (file->f_file.f_hndl != NULL) { fclose (file->f_file.f_hndl); file->f_file.f_hndl = NULL; file->f_bid = -1; } file->f_element = element; rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file); if (HIO_SUCCESS != rc) { return rc; } file->f_bid = block_id; } if (block_offset != file->f_file.f_offset) { fseek (file->f_file.f_hndl, block_offset, SEEK_SET); file->f_file.f_offset = block_offset; } if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) { hioi_element_add_segment (element, foo, block_offset, offset, *size); } *file_out = &file->f_file; return HIO_SUCCESS; }
static int bultin_posix_scatter_data (builtin_posix_module_dataset_t *posix_dataset) { hio_context_t context = hioi_object_context ((hio_object_t) posix_dataset); size_t manifest_size = 0, manifest_id_count = 0; unsigned char *manifest = NULL; int rc = HIO_SUCCESS; int *manifest_ids; char *path; if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { /* only read the manifest this rank wrote */ manifest_id_count = 1; manifest_ids = malloc (sizeof (*manifest_ids)); manifest_ids[0] = context->c_rank; } else { rc = builtin_posix_module_dataset_manifest_list (posix_dataset, &manifest_ids, &manifest_id_count); if (HIO_SUCCESS != rc) { return rc; } } for (size_t i = 0 ; i < manifest_id_count ; ++i) { if (-1 == manifest_ids[i]) { /* nothing more to do */ break; } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: reading manifest data from id %x\n", manifest_ids[i]); /* when writing the manifest in optimized mode each IO manager writes its own manifest. try * to open the manifest. if a manifest does not exist then it is likely this rank did not * write a manifest. IO managers will distribute the manifest data to the appropriate ranks * in hioi_dataset_scatter(). */ rc = asprintf (&path, "%s/manifest.%x.json.bz2", posix_dataset->base_path, manifest_ids[i]); assert (0 < rc); if (access (path, F_OK)) { free (path); /* Check for a non-bzip'd manifest file. */ rc = asprintf (&path, "%s/manifest.%x.json", posix_dataset->base_path, manifest_ids[i]); assert (0 < rc); if (access (path, F_OK)) { /* no manifest found. this might be a non-optimized file format or this rank may not be an * IO master rank. */ free (path); path = NULL; } } if (path) { unsigned char *tmp = NULL; size_t tmp_size = 0; /* read the manifest if it exists */ rc = hioi_manifest_read (path, &tmp, &tmp_size); if (HIO_SUCCESS == rc) { rc = hioi_manifest_merge_data2 (&manifest, &manifest_size, tmp, tmp_size); free (tmp); } free (path); if (HIO_SUCCESS != rc) { break; } } } /* share dataset information with all processes on this node */ if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) { rc = hioi_dataset_scatter_unique (&posix_dataset->base, manifest, manifest_size, rc); } else { rc = hioi_dataset_scatter_comm (&posix_dataset->base, context->c_shared_comm, manifest, manifest_size, rc); } free (manifest_ids); free (manifest); return rc; }
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); unsigned char *manifest = NULL; size_t manifest_size = 0; hio_fs_attr_t *fs_attr; uint64_t start, stop; int rc = HIO_SUCCESS; char *path = NULL; start = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x", hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context), dataset->ds_flags, dataset->ds_mode); rc = builtin_posix_module_dataset_init (module, posix_dataset); if (HIO_SUCCESS != rc) { return rc; } fs_attr = &posix_dataset->base.ds_fsattr; rc = hioi_fs_query (context, module->data_root, fs_attr); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &context->c_object, "posix: error querying the filesystem"); return rc; } if (fs_attr->fs_flags & HIO_FS_SUPPORTS_STRIPING) { hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_scount, "stripe_count", HIO_CONFIG_TYPE_UINT32, NULL, "Stripe count for all dataset " "data files", 0); if (fs_attr->fs_scount > fs_attr->fs_smax_count) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe count %u exceeds the available resources. " "adjusting to maximum %u", fs_attr->fs_scount, fs_attr->fs_smax_count); fs_attr->fs_scount = fs_attr->fs_smax_count; } hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_ssize, "stripe_size", HIO_CONFIG_TYPE_UINT64, NULL, "Stripe size for all dataset " "data files", 0); /* ensure the stripe size is a multiple of the stripe unit */ fs_attr->fs_ssize = fs_attr->fs_sunit * ((fs_attr->fs_ssize + fs_attr->fs_sunit - 1) / fs_attr->fs_sunit); if (fs_attr->fs_ssize > fs_attr->fs_smax_size) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe size %" PRIu64 " exceeds the maximum %" PRIu64 ". ", fs_attr->fs_ssize, fs_attr->fs_smax_size); fs_attr->fs_ssize = fs_attr->fs_smax_size; } hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_raid_level, "raid_level", HIO_CONFIG_TYPE_UINT64, NULL, "RAID level for dataset " "data files. Keep in mind that some filesystems only support 1/2 RAID " "levels", 0); if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) { fs_attr->fs_scount = 1; fs_attr->fs_ssize = dataset->ds_bs; fs_attr->fs_use_group_locking = true; } } do { if (0 != context->c_rank) { break; } if (dataset->ds_flags & HIO_FLAG_TRUNC) { /* blow away the existing dataset */ (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset), dataset->ds_id); /* ensure we take the create path later */ dataset->ds_flags |= HIO_FLAG_CREAT; } if (!(dataset->ds_flags & HIO_FLAG_CREAT)) { /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */ rc = asprintf (&path, "%s/manifest.json.bz2", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { free (path); rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest"); rc = HIO_ERR_NOT_FOUND; break; } } rc = hioi_manifest_read (path, &manifest, &manifest_size); free (path); } else { rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset); if (HIO_SUCCESS != rc) { break; } rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, true); } } while (0); /* share dataset information will all processes in the communication domain */ rc = hioi_dataset_scatter (dataset, manifest, manifest_size, rc); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } free (manifest); if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) { if (HIO_SET_ELEMENT_UNIQUE == dataset->ds_mode || 2 > context->c_size || NULL == dataset->ds_shared_control) { posix_dataset->base.ds_fmode = HIO_FILE_MODE_BASIC; /* NTH: no optimized mode for N->N yet */ hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this " "dataset mode. falling back to basic file mode"); } } dataset->ds_module = module; dataset->ds_close = builtin_posix_module_dataset_close; dataset->ds_element_open = builtin_posix_module_element_open; dataset->ds_process_reqs = builtin_posix_module_process_reqs; pthread_mutex_init (&posix_dataset->lock, NULL); /* record the open time */ gettimeofday (&dataset->ds_otime, NULL); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset %s:%llu on data root %s. " "open time %lu usec", (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); return HIO_SUCCESS; }
int hioi_fs_query (hio_context_t context, const char *path, hio_fs_attr_t *fs_attr) { struct statfs fsinfo; char tmp[4096]; int rc; if (NULL == path) { return HIO_ERR_BAD_PARAM; } do { if (0 != context->c_rank) { break; } if (NULL == realpath (path, tmp)) { fs_attr->fs_type = hioi_err_errno (errno); break; } /* get general filesystem data */ rc = statfs (tmp, &fsinfo); if (0 > rc) { hioi_log(context, HIO_VERBOSE_DEBUG_LOW, "statfs path:%s rc:%d errno:%d(%s)", tmp, rc, errno, strerror(errno)); fs_attr->fs_type = hioi_err_errno (errno); break; } memset (fs_attr, 0, sizeof (*fs_attr)); fs_attr->fs_bavail = fsinfo.f_bavail; fs_attr->fs_btotal = fsinfo.f_blocks; fs_attr->fs_bsize = fsinfo.f_bsize; /* set some reasonable defaults for striping parameters */ fs_attr->fs_scount = 1; fs_attr->fs_ssize = fs_attr->fs_bsize; /* get filesytem specific data */ switch (fsinfo.f_type) { #if defined(LL_SUPER_MAGIC) case LL_SUPER_MAGIC: hioi_fs_query_lustre (tmp, fs_attr); break; #endif #if defined(GPFS_SUPER_MAGIC) case GPFS_SUPER_MAGIC: /* gpfs */ break; #endif #if defined(PAN_FS_CLIENT_MAGIC) case PAN_FS_CLIENT_MAGIC: /* panfs */ break; #endif #if HIO_USE_DATAWARP case DW_SUPER_MAGIC: hioi_fs_query_datawarp (tmp, fs_attr); break; #endif } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "filesystem query: path: %s, type: %d, flags: 0x%x, block size: %" PRIu64 " block count: %" PRIu64 " blocks free: %" PRIu64 " stripe count: %" PRIu32 " stripe max count: %" PRIu32 " stripe unit: %" PRIu64 " stripe size: %" PRIu64 " stripe max size: %" PRIu64, tmp, fs_attr->fs_type, fs_attr->fs_flags, fs_attr->fs_bsize, fs_attr->fs_btotal, fs_attr->fs_bavail, fs_attr->fs_scount, fs_attr->fs_smax_count, fs_attr->fs_sunit, fs_attr->fs_ssize, fs_attr->fs_smax_size); } while (0); #if HIO_MPI_HAVE(1) if (hioi_context_using_mpi (context)) { MPI_Bcast (fs_attr, sizeof (*fs_attr), MPI_BYTE, 0, context->c_comm); } #endif if (0 > fs_attr->fs_type) { return fs_attr->fs_type; } fs_attr->fs_open = hio_fs_open_fns[fs_attr->fs_type]; /* if this assert is hit the above array needs to be updated */ assert (NULL != fs_attr->fs_open); return HIO_SUCCESS; }
static int builtin_posix_create_dataset_dirs (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset) { mode_t access_mode = posix_module->access_mode; hio_context_t context = posix_module->base.context; char *path; int rc; if (context->c_rank > 0) { return HIO_SUCCESS; } /* create the data directory*/ hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix: creating dataset directory @ %s", posix_dataset->base_path); rc = asprintf (&path, "%s/data", posix_dataset->base_path); if (0 > rc) { return hioi_err_errno (errno); } rc = hio_mkpath (context, path, access_mode); if (0 > rc || EEXIST == errno) { if (EEXIST != errno) { hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s", path); } free (path); return hioi_err_errno (errno); } /* set striping parameters on the directory */ if (posix_dataset->base.ds_fsattr.fs_flags & HIO_FS_SUPPORTS_STRIPING) { rc = hioi_fs_set_stripe (path, &posix_dataset->base.ds_fsattr); if (HIO_SUCCESS != rc) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: could not set file system striping on %s", path); } } free (path); /* create trace directory if requested */ if (context->c_enable_tracing) { rc = asprintf (&path, "%s/trace", posix_dataset->base_path); if (0 > rc) { return hioi_err_errno (errno); } rc = hio_mkpath (context, path, access_mode); if (0 > rc || EEXIST == errno) { if (EEXIST != errno) { hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s", path); } free (path); return hioi_err_errno (errno); } } hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: successfully created dataset directories %s", posix_dataset->base_path); return HIO_SUCCESS; }
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; hio_context_t context = hioi_object_context ((hio_object_t) dataset); hio_module_t *module = dataset->ds_module; unsigned char *manifest = NULL; uint64_t start, stop; int rc = HIO_SUCCESS; size_t manifest_size; start = hioi_gettime (); for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { if (posix_dataset->files[i].f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (posix_dataset->files + i), "file_close", posix_dataset->files[i].f_bid, 0); } } #if HIO_MPI_HAVE(3) /* release the shared state if it was allocated */ (void) hioi_dataset_shared_fini (dataset); /* release the dataset map if one was allocated */ (void) hioi_dataset_map_release (dataset); #endif if (dataset->ds_flags & HIO_FLAG_WRITE) { char *path; /* write manifest header */ POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, false, true), "gather_manifest", 0, 0); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (0 == context->c_rank) { rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); if (0 > rc) { /* out of memory. not much we can do now */ return hioi_err_errno (errno); } rc = hioi_manifest_save (dataset, manifest, manifest_size, path); free (manifest); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } #if HIO_MPI_HAVE(3) if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { /* optimized mode requires a data manifest to describe how the data landed on the filesystem */ POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest_comm (dataset, context->c_shared_comm, &manifest, &manifest_size, posix_dataset->ds_use_bzip, false), "gather_manifest", 0, 0); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (NULL != manifest) { rc = asprintf (&path, "%s/manifest.%x.json%s", posix_dataset->base_path, context->c_rank, posix_dataset->ds_use_bzip ? ".bz2" : ""); if (0 > rc) { return hioi_err_errno (errno); } rc = hioi_manifest_save (dataset, manifest, manifest_size, path); free (manifest); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } } #endif } #if HIO_MPI_HAVE(1) /* ensure all ranks have closed the dataset before continuing */ if (hioi_context_using_mpi (context)) { MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm); } #endif free (posix_dataset->base_path); stop = hioi_gettime (); builtin_posix_trace (posix_dataset, "close", 0, 0, start, stop); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset " "%s:%" PRIu64 " on data root %s. close time %" PRIu64 " usec", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); builtin_posix_trace (posix_dataset, "trace_end", 0, 0, 0, 0); if (posix_dataset->ds_trace_fh) { fclose (posix_dataset->ds_trace_fh); } return rc; }
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); unsigned char *manifest = NULL; size_t manifest_size = 0; uint64_t start, stop; int rc = HIO_SUCCESS; char *path = NULL; start = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x", hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context), dataset->ds_flags, dataset->ds_mode); rc = builtin_posix_module_dataset_init (module, posix_dataset); if (HIO_SUCCESS != rc) { return rc; } rc = builtin_posix_module_setup_striping (context, module, dataset); if (HIO_SUCCESS != rc) { return rc; } if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode) { hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_fcount, "dataset_file_count", HIO_CONFIG_TYPE_UINT64, NULL, "Number of files to use " "in strided file mode", 0); } else if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { posix_dataset->ds_use_bzip = true; hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_use_bzip, "dataset_use_bzip", HIO_CONFIG_TYPE_BOOL, NULL, "Use bzip2 compression for dataset manifests", 0); } if (dataset->ds_flags & HIO_FLAG_TRUNC) { /* blow away the existing dataset */ if (0 == context->c_rank) { (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset), dataset->ds_id); } } if (!(dataset->ds_flags & HIO_FLAG_CREAT)) { if (0 == context->c_rank) { /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */ rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path); assert (0 < rc); if (access (path, F_OK)) { /* this should never happen on a valid dataset */ free (path); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest %s", path); rc = HIO_ERR_NOT_FOUND; } else { rc = HIO_SUCCESS; } } /* read the manifest if it exists */ if (HIO_SUCCESS == rc) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: loading manifest header from %s...", path); rc = hioi_manifest_read (path, &manifest, &manifest_size); free (path); path = NULL; } } else if (0 == context->c_rank) { rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset); if (HIO_SUCCESS == rc) { /* serialize the manifest to send to remote ranks */ rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, false, false); } } #if HIO_MPI_HAVE(1) /* share dataset header will all processes in the communication domain */ rc = hioi_dataset_scatter_comm (dataset, context->c_comm, manifest, manifest_size, rc); #endif free (manifest); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } if (context->c_enable_tracing) { char *path; rc = asprintf (&path, "%s/trace/trace.%d", posix_dataset->base_path, context->c_rank); if (rc > 0) { posix_dataset->ds_trace_fh = fopen (path, "a"); free (path); } builtin_posix_trace (posix_dataset, "trace_begin", 0, 0, 0, 0); } #if HIO_MPI_HAVE(3) if (!(dataset->ds_flags & HIO_FLAG_CREAT) && HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { rc = bultin_posix_scatter_data (posix_dataset); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } } /* if possible set up a shared memory window for this dataset */ POSIX_TRACE_CALL(posix_dataset, hioi_dataset_shared_init (dataset, 1), "shared_init", 0, 0); if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { if (2 > context->c_size || NULL == dataset->ds_shared_control) { /* no point in using optimized mode in this case */ posix_dataset->ds_fmode = HIO_FILE_MODE_BASIC; hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this " "dataset mode. falling back to basic file mode, path: %s", posix_dataset->base_path); } else if (HIO_SET_ELEMENT_SHARED == dataset->ds_mode) { POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_generate_map (dataset), "generate_map", 0, 0); if (HIO_SUCCESS != rc) { free (posix_dataset->base_path); return rc; } } } /* NTH: if requested more code is needed to load an optimized dataset with an older MPI */ #endif /* HIO_MPI_HAVE(3) */ dataset->ds_module = module; dataset->ds_close = builtin_posix_module_dataset_close; dataset->ds_element_open = builtin_posix_module_element_open; dataset->ds_process_reqs = builtin_posix_module_process_reqs; /* record the open time */ gettimeofday (&dataset->ds_otime, NULL); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset " "%s:%" PRIu64 " on data root %s. open time %" PRIu64 " usec", (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); builtin_posix_trace (posix_dataset, "open", 0, 0, start, stop); return HIO_SUCCESS; }
static int builtin_posix_module_dataset_list (struct hio_module_t *module, const char *name, hio_dataset_header_t **headers, int *count) { hio_context_t context = module->context; int num_set_ids = 0, set_id_index = 0; int rc = HIO_SUCCESS; struct dirent *dp; char *path = NULL; DIR *dir; *headers = NULL; *count = 0; do { if (0 != context->c_rank) { break; } rc = asprintf (&path, "%s/%s.hio/%s", module->data_root, hioi_object_identifier(context), name); assert (0 <= rc); dir = opendir (path); if (NULL == dir) { num_set_ids = 0; break; } while (NULL != (dp = readdir (dir))) { if (dp->d_name[0] != '.') { num_set_ids++; } } *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers)); assert (NULL != *headers); rewinddir (dir); while (NULL != (dp = readdir (dir))) { if ('.' == dp->d_name[0]) { continue; } char *manifest_path; rc = asprintf (&manifest_path, "%s/%s/manifest.json.bz2", path, dp->d_name); assert (0 <= rc); rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path); if (HIO_SUCCESS == rc) { ++set_id_index; } else { free (manifest_path); rc = asprintf (&manifest_path, "%s/%s/manifest.json", path, dp->d_name); assert (0 <= rc); rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path); if (HIO_SUCCESS == rc) { ++set_id_index; } else { /* skip dataset */ hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_list: could not read manifest at path: %s. rc: %d", manifest_path, rc); } } free (manifest_path); } num_set_ids = set_id_index; } while (0); #if HIO_USE_MPI if (hioi_context_using_mpi (context)) { MPI_Bcast (&num_set_ids, 1, MPI_INT, 0, context->c_comm); } #endif if (0 == context->c_rank) { closedir (dir); free (path); } if (0 == num_set_ids) { free (*headers); *headers = NULL; return HIO_SUCCESS; } if (0 != context->c_rank) { *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers)); assert (NULL != *headers); } #if HIO_USE_MPI if (hioi_context_using_mpi (context)) { MPI_Bcast (*headers, sizeof (**headers) * num_set_ids, MPI_BYTE, 0, context->c_comm); } #endif *count = num_set_ids; return HIO_SUCCESS; }
static int hioi_config_set_value_internal (hio_context_t context, hio_var_t *var, const char *strval) { uint64_t intval = hioi_string_to_int(strval); if (NULL == strval) { /* empty value. nothing to do */ return HIO_SUCCESS; } if (var->var_enum) { bool found = false; if ((uint64_t) -1 == intval) { for (int i = 0 ; i < var->var_enum->count ; ++i) { if (0 == strcmp (var->var_enum->values[i].string_value, strval)) { intval = var->var_enum->values[i].value; found = true; break; } } } else { for (int i = 0 ; i < var->var_enum->count ; ++i) { if (intval == var->var_enum->values[i].value) { found = true; break; } } } if (found) { hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting enumeration value to %" PRIu64, intval); } else { hioi_log (context, HIO_VERBOSE_WARN, "Invalid enumeration value provided for variable %s. Got %s", var->var_name, strval); return HIO_ERR_BAD_PARAM; } } switch (var->var_type) { case HIO_CONFIG_TYPE_BOOL: if (0 == strcmp (strval, "true") || 0 == strcmp (strval, "t") || 0 == strcmp (strval, "1")) { var->var_storage->boolval = true; } else if (0 == strcmp (strval, "false") || 0 == strcmp (strval, "f") || 0 == strcmp (strval, "0")) { var->var_storage->boolval = false; } else { var->var_storage->boolval = !!intval; } break; case HIO_CONFIG_TYPE_STRING: if (var->var_storage->strval) { free (var->var_storage->strval); } var->var_storage->strval = strdup (strval); break; case HIO_CONFIG_TYPE_INT32: var->var_storage->int32val = (int32_t) (intval & 0xffffffff); break; case HIO_CONFIG_TYPE_UINT32: var->var_storage->uint32val = (uint32_t) (intval & 0xffffffffu); break; case HIO_CONFIG_TYPE_INT64: var->var_storage->int64val = (int64_t) intval; break; case HIO_CONFIG_TYPE_UINT64: var->var_storage->uint64val = intval; break; case HIO_CONFIG_TYPE_FLOAT: var->var_storage->floatval = strtof (strval, NULL); break; case HIO_CONFIG_TYPE_DOUBLE: var->var_storage->doubleval = strtod (strval, NULL); break; } return HIO_SUCCESS; }
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module; hio_context_t context = hioi_object_context ((hio_object_t) dataset); hio_module_t *module = dataset->ds_module; unsigned char *manifest; uint64_t start, stop; int rc = HIO_SUCCESS; size_t manifest_size; start = hioi_gettime (); for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) { if (posix_dataset->files[i].f_file.f_hndl != NULL) { fclose (posix_dataset->files[i].f_file.f_hndl); posix_dataset->files[i].f_file.f_hndl = NULL; } } if (dataset->ds_flags & HIO_FLAG_WRITE) { rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, dataset->ds_use_bzip); if (HIO_SUCCESS != rc) { dataset->ds_status = rc; } if (0 == context->c_rank) { char *path; rc = asprintf (&path, "%s/manifest.json%s", posix_dataset->base_path, dataset->ds_use_bzip ? ".bz2" : ""); if (0 < rc) { int fd; errno = 0; fd = open (path, O_CREAT | O_WRONLY, posix_module->access_mode); if (0 <= fd) { (void) write (fd, manifest, manifest_size); close (fd); } free (manifest); rc = hioi_err_errno (errno); free (path); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest"); } } else { rc = HIO_ERR_OUT_OF_RESOURCE; } } } #if HIO_USE_MPI /* ensure all ranks have closed the dataset before continuing */ if (hioi_context_using_mpi (context)) { MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm); } #endif free (posix_dataset->base_path); pthread_mutex_destroy (&posix_dataset->lock); stop = hioi_gettime (); hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset %s:%llu on data root %s. " "close time %lu usec", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start); return rc; }
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element, uint64_t offset, size_t *size, hio_file_t **file_out, bool reading) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element); hio_context_t context = hioi_object_context (&element->e_object); hio_file_t *file; uint64_t file_offset; int file_index = 0; char *path; int rc; hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %" PRIu64 " size %lu", hioi_object_identifier (&element->e_object), offset, *size); POSIX_TRACE_CALL(posix_dataset, rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size), "translate_offset", offset, *size); #if HIO_MPI_HAVE(3) if (HIO_SUCCESS != rc && reading) { POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_map_translate_offset (element, offset, &file_index, &file_offset, size), "map_translate_offset", offset, *size); } #endif if (HIO_SUCCESS != rc) { if (reading) { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset %" PRIu64 " not found", offset); /* not found */ return rc; } file_offset = builtin_posix_reserve (posix_dataset, size); if (hioi_context_using_mpi (context)) { file_index = posix_dataset->base.ds_shared_control->s_master; } else { file_index = 0; } rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } hioi_element_add_segment (element, file_index, file_offset, offset, *size); } else { hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ rank %d, offset %" PRIu64 ", size %lu", file_index, file_offset, *size); rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } if (access (path, R_OK)) { free (path); rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, file_index); if (0 > rc) { return HIO_ERR_OUT_OF_RESOURCE; } } } /* use crc as a hash to pick a file index to use */ int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES; file = posix_dataset->files + internal_index; if (file_index != file->f_bid) { if (file->f_bid >= 0) { POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0); } file->f_bid = -1; POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file), "file_open", file_index, 0); if (HIO_SUCCESS != rc) { free (path); return rc; } file->f_bid = file_index; } free (path); POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, file_offset, SEEK_SET), "file_seek", file->f_bid, file_offset); *file_out = file; return HIO_SUCCESS; }
static int builtin_posix_module_setup_striping (hio_context_t context, struct hio_module_t *module, hio_dataset_t dataset) { builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset; hio_fs_attr_t *fs_attr = &dataset->ds_fsattr; int rc; /* query the filesystem for current striping parameters */ rc = hioi_fs_query (context, module->data_root, fs_attr); if (HIO_SUCCESS != rc) { hioi_err_push (rc, &context->c_object, "posix: error querying the filesystem"); return rc; } /* for now do not use stripe exclusivity in any path */ posix_dataset->my_stripe = 0; /* set default stripe count */ fs_attr->fs_scount = 1; posix_dataset->ds_fcount = 1; if (fs_attr->fs_flags & HIO_FS_SUPPORTS_STRIPING) { if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) { /* pick a reasonable default stripe size */ fs_attr->fs_ssize = 1 << 24; /* use group locking if available as we guarantee stripe exclusivity in optimized mode */ fs_attr->fs_use_group_locking = true; #if HIO_MPI_HAVE(3) /* if group locking is not available then each rank should attempt to write to * a different stripe to maximize the available IO bandwidth */ fs_attr->fs_scount = min(context->c_shared_size, fs_attr->fs_smax_count); #endif } else if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode) { /* pick a reasonable default stripe size */ fs_attr->fs_ssize = posix_dataset->ds_bs; posix_dataset->ds_fcount = fs_attr->fs_smax_count * 32; fs_attr->fs_scount = 16; if (context->c_size < posix_dataset->ds_fcount) { posix_dataset->ds_fcount = context->c_size; } } else if (HIO_SET_ELEMENT_UNIQUE != dataset->ds_mode) { /* set defaults striping count */ fs_attr->fs_ssize = 1 << 20; fs_attr->fs_scount = max (1, (unsigned) ((float) fs_attr->fs_smax_count * 0.9)); } else { fs_attr->fs_ssize = 1 << 20; } hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_scount, "stripe_count", HIO_CONFIG_TYPE_UINT32, NULL, "Stripe count for all dataset " "data files", 0); hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_ssize, "stripe_size", HIO_CONFIG_TYPE_UINT64, NULL, "Stripe size for all dataset " "data files", 0); if (fs_attr->fs_flags & HIO_FS_SUPPORTS_RAID) { hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_raid_level, "raid_level", HIO_CONFIG_TYPE_UINT64, NULL, "RAID level for dataset " "data files. Keep in mind that some filesystems only support 1/2 RAID " "levels", 0); } /* ensure stripe count is sane */ if (fs_attr->fs_scount > fs_attr->fs_smax_count) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe count %u exceeds the available resources. " "adjusting to maximum %u", fs_attr->fs_scount, fs_attr->fs_smax_count); fs_attr->fs_scount = fs_attr->fs_smax_count; } /* ensure the stripe size is a multiple of the stripe unit */ fs_attr->fs_ssize = fs_attr->fs_sunit * ((fs_attr->fs_ssize + fs_attr->fs_sunit - 1) / fs_attr->fs_sunit); if (fs_attr->fs_ssize > fs_attr->fs_smax_size) { hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe size %" PRIu64 " exceeds the maximum %" PRIu64 ". ", fs_attr->fs_ssize, fs_attr->fs_smax_size); fs_attr->fs_ssize = fs_attr->fs_smax_size; } if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode && posix_dataset->ds_bs < fs_attr->fs_ssize) { posix_dataset->ds_bs = fs_attr->fs_ssize; } } return HIO_SUCCESS; }