static int builtin_posix_create_dataset_dirs (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset) {
  mode_t access_mode = posix_module->access_mode;
  hio_context_t context = posix_module->base.context;
  int rc;

  if (context->c_rank > 0) {
    return HIO_SUCCESS;
  }

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix: creating dataset directory @ %s", posix_dataset->base_path);

  rc = hio_mkpath (context, posix_dataset->base_path, access_mode);
  if (0 > rc || EEXIST == errno) {
    if (EEXIST != errno) {
      hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s",
                    posix_dataset->base_path);
    }

    return hioi_err_errno (errno);
  }

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: successfully created dataset directories");

  return HIO_SUCCESS;
}
static int builtin_posix_element_translate_strided (builtin_posix_module_t *posix_module, hio_element_t element,
                                                    uint64_t offset, size_t *size, hio_file_t **file_out) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element);
  size_t block_id, block_base, block_bound, block_offset, file_id, file_block;
  hio_context_t context = hioi_object_context (&element->e_object);
  hio_file_t *file;
  int32_t file_index;
  char *path;
  int rc;

  block_id = offset / posix_dataset->ds_bs;

  file_id = block_id % posix_dataset->ds_fcount;
  file_block = block_id / posix_dataset->ds_fcount;

  block_base = block_id * posix_dataset->ds_bs;
  block_bound = block_base + posix_dataset->ds_bs;
  block_offset = file_block * posix_dataset->ds_bs + offset - block_base;

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate_strided: element: %s, offset: %"
            PRIu64 ", file_id: %lu, file_block: %lu, block_offset: %lu, block_size: %" PRIu64,
            hioi_object_identifier(element), offset, file_id, file_id, block_offset, posix_dataset->ds_bs);

  if (offset + *size > block_bound) {
    *size = block_bound - offset;
  }

  rc = asprintf (&path, "%s/data/%s_block.%08lu", posix_dataset->base_path, hioi_object_identifier(element),
                 (unsigned long) file_id);
  if (0 > rc) {
    return HIO_ERR_OUT_OF_RESOURCE;
  }

  /* use crc as a hash to pick a file index to use */
  file_index = file_id % HIO_POSIX_MAX_OPEN_FILES;
  file = posix_dataset->files + file_index;

  if (file_id != file->f_bid || file->f_element != element) {
    if (file->f_bid >= 0) {
      POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0);
    }
    file->f_bid = -1;

    file->f_element = element;

    POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file),
                     "file_open", file_id, 0);
    if (HIO_SUCCESS != rc) {
      return rc;
    }

    file->f_bid = file_id;
  }

  POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, block_offset, SEEK_SET), "file_seek", file->f_bid, block_offset);

  *file_out = file;

  return HIO_SUCCESS;
}
static int builtin_posix_module_element_open (hio_dataset_t dataset, hio_element_t element) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module;
  hio_context_t context = hioi_object_context (&dataset->ds_object);
  int rc;

  if (HIO_FILE_MODE_BASIC == dataset->ds_fmode) {
    rc = builtin_posix_module_element_open_basic (posix_module, posix_dataset, element);
    if (HIO_SUCCESS != rc) {
      hioi_object_release (&element->e_object);
      return rc;
    }
  }

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: %s element %p (identifier %s) for dataset %s",
	    (HIO_FLAG_WRITE & dataset->ds_flags) ? "created" : "opened", element,
            hioi_object_identifier(element), hioi_object_identifier(dataset));

  element->e_write_strided_nb = builtin_posix_module_element_write_strided_nb;
  element->e_read_strided_nb = builtin_posix_module_element_read_strided_nb;
  element->e_flush = builtin_posix_module_element_flush;
  element->e_complete = builtin_posix_module_element_complete;
  element->e_close = builtin_posix_module_element_close;

  return HIO_SUCCESS;
}
static int builtin_posix_module_dataset_unlink (struct hio_module_t *module, const char *name, int64_t set_id) {
  struct stat statinfo;
  char *path = NULL;
  int rc;

  if (module->context->c_rank) {
    return HIO_ERR_NOT_AVAILABLE;
  }

  rc = builtin_posix_dataset_path (module, &path, name, set_id);
  if (HIO_SUCCESS != rc) {
    return rc;
  }

  if (stat (path, &statinfo)) {
    free (path);
    return hioi_err_errno (errno);
  }

  hioi_log (module->context, HIO_VERBOSE_DEBUG_LOW, "posix: unlinking existing dataset %s::%llu",
            name, set_id);

  /* use tree walk depth-first to remove all of the files for this dataset */
  rc = nftw (path, builtin_posix_unlink_cb, 32, FTW_DEPTH | FTW_PHYS);
  free (path);
  if (0 > rc) {
    hioi_err_push (hioi_err_errno (errno), &module->context->c_object, "posix: could not unlink dataset. errno: %d",
                  errno);
    return hioi_err_errno (errno);
  }

  return HIO_SUCCESS;
}
Exemple #5
0
static int hioi_fs_open_datawarp (hio_context_t context, const char *path, hio_fs_attr_t *fs_attr,
				  int flags, int mode) {
#if defined(DW_SUPER_MAGIC)
  int rc, fd;

  fd = open (path, flags, mode);
  if (-1 == fd && EEXIST == errno) {
    flags &= ~O_CREAT;
    fd = open (path, flags);
    if (fd < 0) {
      return hioi_err_errno (errno);
    }

    return fd;
  }

  if (fd >= 0) {
    if (flags & O_CREAT) {
      rc = dw_set_stripe_configuration (fd, fs_attr->fs_ssize, fs_attr->fs_scount);
      if (0 != rc) {
	hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "datawarp: could not set file striping parameters: "
		  "errno = %d", -rc);
      }
    }

    return fd;
  }

  return hioi_err_errno (errno);
#else
  return hioi_fs_open_posix (context, path, fs_attr, flags, mode);
#endif
}
Exemple #6
0
static int hioi_config_set_from_env (hio_context_t context, hio_object_t object,
				     hio_var_t *var) {
  char *string_value;
  char env_name[256];

  if (HIO_OBJECT_TYPE_DATASET == object->type) {
    /* check for dataset specific variables */
    snprintf (env_name, 256, "%sdataset_%s_%s_%s", hio_config_env_prefix, context->c_object.identifier,
              object->identifier, var->var_name);

    hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name);

    string_value = getenv (env_name);
    if (NULL != string_value) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s",
                var->var_name, string_value, env_name);
      return hioi_config_set_value_internal (context, var, string_value);
    }

    snprintf (env_name, 256, "%sdataset_%s_%s", hio_config_env_prefix, object->identifier, var->var_name);

    hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name);

    string_value = getenv (env_name);
    if (NULL != string_value) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s",
                var->var_name, string_value, env_name);
      return hioi_config_set_value_internal (context, var, string_value);
    }
  }

  snprintf (env_name, 256, "%scontext_%s_%s", hio_config_env_prefix, context->c_object.identifier,
            var->var_name);

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name);

  string_value = getenv (env_name);
  if (NULL != string_value) {
    hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s",
              var->var_name, string_value, env_name);
    return hioi_config_set_value_internal (context, var, string_value);
  }

  snprintf (env_name, 256, "%s%s", hio_config_env_prefix, var->var_name);

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "Looking for variable %s", env_name);

  string_value = getenv (env_name);
  if (NULL != string_value) {
    hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from ENV %s",
              var->var_name, string_value, env_name);
    return hioi_config_set_value_internal (context, var, string_value);
  }

  return HIO_SUCCESS;
}
static int builtin_posix_module_fini (struct hio_module_t *module) {
  hioi_log (module->context, HIO_VERBOSE_DEBUG_LOW, "posix: finalizing module for data root %s",
	    module->data_root);

  free (module->data_root);
  free (module);

  return HIO_SUCCESS;
}
Exemple #8
0
int hio_mkpath (hio_context_t context, const char *path, mode_t access_mode) {
  char *tmp = strdup (path);
  int rc;

  if (NULL == tmp) {
    return HIO_ERR_OUT_OF_RESOURCE;
  }

  for (char *sep = strchr (tmp, '/') ; sep ; sep = strchr (sep + 1, '/')) {
    if (sep == tmp) {
      continue;
    }

    *sep = '\0';
    errno = 0;

    if (access (tmp, F_OK)) {
      hioi_log (context, HIO_VERBOSE_DEBUG_MED, "creating directory %s with permissions 0%o", tmp, access_mode);

      rc = mkdir (tmp, access_mode);
      if (0 != rc && (EEXIST != errno)) {
        hioi_log (context, HIO_VERBOSE_WARN, "could not create directory %s. errno: %d", tmp, errno);
        free (tmp);
        return HIO_ERROR;
      }
    } else {
      errno = EEXIST;
    }

    *sep = '/';
  }

  errno = 0;
  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "creating directory %s with permissions 0%o", tmp, access_mode);
  rc = mkdir (tmp, access_mode);
  free (tmp);
  return (rc && errno != EEXIST) ? HIO_ERROR : HIO_SUCCESS;
}
Exemple #9
0
/**
 * Search for a matching value in the configuration file.
 *
 * @param[in] context  context to search
 * @param[in] object   associated object
 * @param[in] var      variable to set
 *
 * This function currently does a linear search of the configuration
 * file. In the future this should be replaced with a hash table or
 * similar structure.
 */
static int hioi_config_set_from_kv_list (hio_config_kv_list_t *list, hio_object_t object,
                                         hio_var_t *var) {
  hio_context_t context = hioi_object_context (object);

  for (int i = 0 ; i < list->kv_list_count ; ++i) {
    hio_config_kv_t *kv = list->kv_list + i;
    if ((HIO_OBJECT_TYPE_ANY == kv->object_type || object->type == kv->object_type) &&
        (NULL == kv->object_identifier || !strcmp (object->identifier, kv->object_identifier)) &&
        !strcmp (var->var_name, kv->key)) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting value for %s to %s from file",
                var->var_name, kv->value);
      return hioi_config_set_value_internal (context, var, kv->value);
    }
  }

  return HIO_SUCCESS;
}
Exemple #10
0
void hioi_err_push (int hrc, hio_object_t object, char *format, ...) {
  hio_context_t context = object ? hioi_object_context (object) : NULL;
  hio_error_stack_item_t *new_item;
  va_list vargs;
  int rc;

  new_item = calloc (1, sizeof (hio_error_stack_item_t));
  if (NULL == new_item) {
    /* not much can be done here. we are just plain OOM. */
    return;
  }

  va_start (vargs, format);

  rc = vasprintf (&new_item->error_string, format, vargs);

  va_end (vargs);

  if (0 >= rc) {
    /* couldn't allocate error string */
    free (new_item);
    return;
  }

  if (context) {
    hioi_log (context, HIO_VERBOSE_ERROR, "%s", new_item->error_string);
  }

  new_item->hrc = hrc;

  /* push the error message onto the stack */
  if (NULL == context) {
    pthread_mutex_lock (&hio_error_stack_mutex);
    new_item->next = hio_error_stack_head;
    hio_error_stack_head = new_item;
    pthread_mutex_unlock (&hio_error_stack_mutex);
  } else {
    hioi_object_lock (&context->c_object);
    new_item->next = (hio_error_stack_item_t *) context->c_estack;
    context->c_estack = (void *) new_item;
    hioi_object_unlock (&context->c_object);
   }
}
static int builtin_posix_component_query (hio_context_t context, const char *data_root,
					  const char *next_data_root, hio_module_t **module) {
  builtin_posix_module_t *new_module;

  if (0 == strncasecmp("datawarp", data_root, 8) || 0 == strncasecmp("dw", data_root, 2)) {
    return HIO_ERR_NOT_AVAILABLE;
  }

  if (0 == strncasecmp("posix:", data_root, 6)) {
    /* skip posix: */
    data_root += 6;
  }

  if (access (data_root, F_OK)) {
    hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: data root %s does not exist or can not be accessed",
                  data_root);
    return hioi_err_errno (errno);
  }

  new_module = calloc (1, sizeof (builtin_posix_module_t));
  if (NULL == new_module) {
    return HIO_ERR_OUT_OF_RESOURCE;
  }

  memcpy (new_module, &builtin_posix_module_template, sizeof (builtin_posix_module_template));

  new_module->base.data_root = strdup (data_root);
  new_module->base.context = context;

  /* get the current umask */
  new_module->access_mode = umask (0);
  umask (new_module->access_mode);

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: created module for data root %s. using umask %o",
	    data_root, new_module->access_mode);

  new_module->access_mode ^= 0777;
  *module = &new_module->base;

  return HIO_SUCCESS;
}
static ssize_t builtin_posix_module_element_write_strided_internal (builtin_posix_module_t *posix_module, hio_element_t element,
                                                                    off_t offset, const void *ptr, size_t count, size_t size,
                                                                    size_t stride) {
  hio_dataset_t dataset = hioi_element_dataset (element);
  size_t bytes_written = 0, ret;
  hio_file_t *file;
  uint64_t stop, start;
  int rc;

  assert (dataset->ds_flags & HIO_FLAG_WRITE);

  if (!(count * size)) {
    return 0;
  }

  if (0 == stride) {
    size *= count;
    count = 1;
  }

  start = hioi_gettime ();

  errno = 0;

  for (size_t i = 0 ; i < count ; ++i) {
    size_t req = size, actual;

    do {
      actual = req;

      rc = builtin_posix_element_translate (posix_module, element, offset, &actual,
                                            &file, false);
      assert (file);
      if (HIO_SUCCESS != rc) {
        break;
      }

      ret = fwrite (ptr, 1, actual, file->f_hndl);
      if (ret > 0) {
        bytes_written += ret;
        file->f_offset += ret;
      }

      if (ret < actual) {
        /* short write */
        break;
      }

      req -= actual;
      offset += actual;
      ptr = (void *) ((intptr_t) ptr + actual);
    } while (req);

    if (HIO_SUCCESS != rc || req) {
      break;
    }

    ptr = (void *) ((intptr_t) ptr + stride);
  }

  if (0 == bytes_written || HIO_SUCCESS != rc) {
    if (0 == bytes_written) {
      rc = hioi_err_errno (errno);
    }

    dataset->ds_status = rc;
    return rc;
  }

  if (offset + bytes_written > element->e_size) {
    element->e_size = offset + bytes_written;
  }

  stop = hioi_gettime ();

  dataset->ds_stat.s_wtime += stop - start;

  if (0 < bytes_written) {
    dataset->ds_stat.s_bwritten += bytes_written;
  }

  hioi_log (hioi_object_context (&element->e_object), HIO_VERBOSE_DEBUG_LOW, "posix: finished write. bytes written: "
            "%lu, time: %llu usec", bytes_written, stop - start);

  return bytes_written;
}
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset,
                                                size_t *size, hio_file_t **file_out, bool reading) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element);
  hio_context_t context = hioi_object_context (&element->e_object);
  builtin_posix_file_t *file;
  uint64_t file_offset;
  int file_index;
  char *path;
  int rc;

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %ld size %lu",
            hioi_object_identifier (&element->e_object), offset, *size);
  rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size);
  if (HIO_SUCCESS != rc) {
    if (reading) {
      hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset not found");
      /* not found */
      return rc;
    }

    if (hioi_context_using_mpi (context)) {
      rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, posix_dataset->base.ds_shared_control->s_master);
      if (0 > rc) {
        return HIO_ERR_OUT_OF_RESOURCE;
      }
    } else {
      rc = asprintf (&path, "%s/data", posix_dataset->base_path);
      if (0 > rc) {
        return HIO_ERR_OUT_OF_RESOURCE;
      }
    }

    file_offset = builtin_posix_reserve (posix_dataset, size);

    file_index = hioi_dataset_add_file (&posix_dataset->base, strrchr (path, '/') + 1);
    hioi_element_add_segment (element, file_index, file_offset, offset, *size);
  } else {
    hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ index %d, offset %lu, size %lu", file_index,
              file_offset, *size);
    rc = asprintf (&path, "%s/%s", posix_dataset->base_path, posix_dataset->base.ds_flist[file_index].f_name);
    if (0 > rc) {
      return HIO_ERR_OUT_OF_RESOURCE;
    }
  }

  /* use crc as a hash to pick a file index to use */
  int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES;
  file = posix_dataset->files + internal_index;

  if (internal_index != file->f_bid) {
    if (NULL != file->f_file.f_hndl) {
      fclose (file->f_file.f_hndl);
      file->f_file.f_hndl = NULL;
      file->f_bid = -1;
    }

    rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file);
    if (HIO_SUCCESS != rc) {
      free (path);
      return rc;
    }

    file->f_bid = file_index;
  }

  free (path);

  if (file_offset != file->f_file.f_offset) {
    fseek (file->f_file.f_hndl, file_offset, SEEK_SET);
    file->f_file.f_offset = file_offset;
  }

  *file_out = &file->f_file;

  return HIO_SUCCESS;
}
static int builtin_posix_element_translate_opt_old (builtin_posix_module_t *posix_module, hio_element_t element, off_t offset,
                                                    size_t *size, hio_file_t **file_out) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element);
  hio_context_t context = hioi_object_context (&element->e_object);
  size_t block_id, block_base, block_bound, block_offset;
  builtin_posix_file_t *file;
  int32_t file_index;
  char *path;
  int rc, foo;

  block_id = offset / posix_dataset->base.ds_bs;
  block_base = block_id * posix_dataset->base.ds_bs;
  block_bound = block_base + posix_dataset->base.ds_bs;
  block_offset = offset - block_base;

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "builtin_posix_element_translate: element: %s, offset: %lu, block_id: %lu, "
            "block_offset: %lu, block_size: %lu", hioi_object_identifier(element), (unsigned long) offset,
            block_id, block_offset, posix_dataset->base.ds_bs);

  if (offset + *size > block_bound) {
    *size = block_bound - offset;
  }

  rc = asprintf (&path, "%s_block.%lu", hioi_object_identifier(element), (unsigned long) block_id);
  if (0 > rc) {
    return HIO_ERR_OUT_OF_RESOURCE;
  }

  if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) {
    foo = hioi_dataset_add_file (&posix_dataset->base, path);
  }
  char *tmp = path;
  rc = asprintf (&path, "%s/%s", posix_dataset->base_path, tmp);
  free (tmp);
  if (0 > rc) {
    return HIO_ERR_OUT_OF_RESOURCE;
  }

  /* use crc as a hash to pick a file index to use */
  file_index = hioi_crc32 ((uint8_t *) path, strlen (path)) % HIO_POSIX_MAX_OPEN_FILES;
  file = posix_dataset->files + file_index;

  if (block_id != file->f_bid || file->f_element != element) {
    if (file->f_file.f_hndl != NULL) {
      fclose (file->f_file.f_hndl);
      file->f_file.f_hndl = NULL;
      file->f_bid = -1;
    }

    file->f_element = element;

    rc = builtin_posix_open_file (posix_module, posix_dataset, path, &file->f_file);
    if (HIO_SUCCESS != rc) {
      return rc;
    }

    file->f_bid = block_id;
  }

  if (block_offset != file->f_file.f_offset) {
    fseek (file->f_file.f_hndl, block_offset, SEEK_SET);
    file->f_file.f_offset = block_offset;
  }

  if (HIO_FLAG_WRITE & posix_dataset->base.ds_flags) {
    hioi_element_add_segment (element, foo, block_offset, offset, *size);
  }

  *file_out = &file->f_file;

  return HIO_SUCCESS;
}
static int bultin_posix_scatter_data (builtin_posix_module_dataset_t *posix_dataset) {
  hio_context_t context = hioi_object_context ((hio_object_t) posix_dataset);
  size_t manifest_size = 0, manifest_id_count = 0;
  unsigned char *manifest = NULL;
  int rc = HIO_SUCCESS;
  int *manifest_ids;
  char *path;

  if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) {
    /* only read the manifest this rank wrote */
    manifest_id_count = 1;
    manifest_ids = malloc (sizeof (*manifest_ids));
    manifest_ids[0] = context->c_rank;
  } else {
    rc = builtin_posix_module_dataset_manifest_list (posix_dataset, &manifest_ids, &manifest_id_count);
    if (HIO_SUCCESS != rc) {
      return rc;
    }
  }

  for (size_t i = 0 ; i < manifest_id_count ; ++i) {
    if (-1 == manifest_ids[i]) {
      /* nothing more to do */
      break;
    }

    hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: reading manifest data from id %x\n",
              manifest_ids[i]);

    /* when writing the manifest in optimized mode each IO manager writes its own manifest. try
     * to open the manifest. if a manifest does not exist then it is likely this rank did not
     * write a manifest. IO managers will distribute the manifest data to the appropriate ranks
     * in hioi_dataset_scatter(). */
    rc = asprintf (&path, "%s/manifest.%x.json.bz2", posix_dataset->base_path, manifest_ids[i]);
    assert (0 < rc);

    if (access (path, F_OK)) {
      free (path);
      /* Check for a non-bzip'd manifest file. */
      rc = asprintf (&path, "%s/manifest.%x.json", posix_dataset->base_path, manifest_ids[i]);
      assert (0 < rc);
      if (access (path, F_OK)) {
        /* no manifest found. this might be a non-optimized file format or this rank may not be an
         * IO master rank. */
        free (path);
        path = NULL;
      }
    }

    if (path) {
      unsigned char *tmp = NULL;
      size_t tmp_size = 0;
      /* read the manifest if it exists */
      rc = hioi_manifest_read (path, &tmp, &tmp_size);
      if (HIO_SUCCESS == rc) {
        rc = hioi_manifest_merge_data2 (&manifest, &manifest_size, tmp, tmp_size);
        free (tmp);
      }

      free (path);

      if (HIO_SUCCESS != rc) {
        break;
      }
    }
  }

  /* share dataset information with all processes on this node */
  if (HIO_SET_ELEMENT_UNIQUE == posix_dataset->base.ds_mode) {
    rc = hioi_dataset_scatter_unique (&posix_dataset->base, manifest, manifest_size, rc);
  } else {
    rc = hioi_dataset_scatter_comm (&posix_dataset->base, context->c_shared_comm, manifest, manifest_size, rc);
  }

  free (manifest_ids);
  free (manifest);

  return rc;
}
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module;
  hio_context_t context = hioi_object_context ((hio_object_t) dataset);
  unsigned char *manifest = NULL;
  size_t manifest_size = 0;
  hio_fs_attr_t *fs_attr;
  uint64_t start, stop;
  int rc = HIO_SUCCESS;
  char *path = NULL;

  start = hioi_gettime ();

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x",
	    hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context),
            dataset->ds_flags, dataset->ds_mode);

  rc = builtin_posix_module_dataset_init (module, posix_dataset);
  if (HIO_SUCCESS != rc) {
    return rc;
  }

  fs_attr = &posix_dataset->base.ds_fsattr;

  rc = hioi_fs_query (context, module->data_root, fs_attr);
  if (HIO_SUCCESS != rc) {
    hioi_err_push (rc, &context->c_object, "posix: error querying the filesystem");
    return rc;
  }

  if (fs_attr->fs_flags & HIO_FS_SUPPORTS_STRIPING) {
    hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_scount,
                     "stripe_count", HIO_CONFIG_TYPE_UINT32, NULL, "Stripe count for all dataset "
                     "data files", 0);

    if (fs_attr->fs_scount > fs_attr->fs_smax_count) {
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe count %u exceeds the available resources. "
                "adjusting to maximum %u", fs_attr->fs_scount, fs_attr->fs_smax_count);
      fs_attr->fs_scount = fs_attr->fs_smax_count;
    }

    hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_ssize,
                     "stripe_size", HIO_CONFIG_TYPE_UINT64, NULL, "Stripe size for all dataset "
                     "data files", 0);

    /* ensure the stripe size is a multiple of the stripe unit */
    fs_attr->fs_ssize = fs_attr->fs_sunit * ((fs_attr->fs_ssize + fs_attr->fs_sunit - 1) / fs_attr->fs_sunit);
    if (fs_attr->fs_ssize > fs_attr->fs_smax_size) {
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe size %" PRIu64 " exceeds the maximum %"
                PRIu64 ". ", fs_attr->fs_ssize, fs_attr->fs_smax_size);
      fs_attr->fs_ssize = fs_attr->fs_smax_size;
    }

    hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_raid_level,
                     "raid_level", HIO_CONFIG_TYPE_UINT64, NULL, "RAID level for dataset "
                     "data files. Keep in mind that some filesystems only support 1/2 RAID "
                     "levels", 0);

    if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) {
      fs_attr->fs_scount = 1;
      fs_attr->fs_ssize = dataset->ds_bs;
      fs_attr->fs_use_group_locking = true;
    }
  }

  do {
    if (0 != context->c_rank) {
      break;
    }

    if (dataset->ds_flags & HIO_FLAG_TRUNC) {
      /* blow away the existing dataset */
      (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset),
                                                  dataset->ds_id);

      /* ensure we take the create path later */
      dataset->ds_flags |= HIO_FLAG_CREAT;
    }

    if (!(dataset->ds_flags & HIO_FLAG_CREAT)) {
      /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */
      rc = asprintf (&path, "%s/manifest.json.bz2", posix_dataset->base_path);
      assert (0 < rc);

      if (access (path, F_OK)) {
        free (path);
        rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path);
        assert (0 < rc);
        if (access (path, F_OK)) {
          hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest");
          rc = HIO_ERR_NOT_FOUND;
          break;
        }
      }

      rc = hioi_manifest_read (path, &manifest, &manifest_size);
      free (path);
    } else {
      rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset);
      if (HIO_SUCCESS != rc) {
        break;
      }

      rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, true);
    }
  } while (0);

  /* share dataset information will all processes in the communication domain */
  rc = hioi_dataset_scatter (dataset, manifest, manifest_size, rc);
  if (HIO_SUCCESS != rc) {
    free (posix_dataset->base_path);
    return rc;
  }

  free (manifest);

  if (HIO_FILE_MODE_OPTIMIZED == dataset->ds_fmode) {
    if (HIO_SET_ELEMENT_UNIQUE == dataset->ds_mode || 2 > context->c_size || NULL == dataset->ds_shared_control) {
      posix_dataset->base.ds_fmode = HIO_FILE_MODE_BASIC;
      /* NTH: no optimized mode for N->N yet */
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this "
                "dataset mode. falling back to basic file mode");
    }
  }

  dataset->ds_module = module;
  dataset->ds_close = builtin_posix_module_dataset_close;
  dataset->ds_element_open = builtin_posix_module_element_open;
  dataset->ds_process_reqs = builtin_posix_module_process_reqs;

  pthread_mutex_init (&posix_dataset->lock, NULL);

  /* record the open time */
  gettimeofday (&dataset->ds_otime, NULL);

  stop = hioi_gettime ();

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset %s:%llu on data root %s. "
            "open time %lu usec", (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset),
            dataset->ds_id, module->data_root, stop - start);

  return HIO_SUCCESS;
}
Exemple #17
0
int hioi_fs_query (hio_context_t context, const char *path, hio_fs_attr_t *fs_attr) {
  struct statfs fsinfo;
  char tmp[4096];
  int rc;

  if (NULL == path) {
    return HIO_ERR_BAD_PARAM;
  }

  do {
    if (0 != context->c_rank) {
      break;
    }

    if (NULL == realpath (path, tmp)) {
      fs_attr->fs_type = hioi_err_errno (errno);
      break;
    }

    /* get general filesystem data */
    rc = statfs (tmp, &fsinfo);
    if (0 > rc) {
      hioi_log(context, HIO_VERBOSE_DEBUG_LOW, "statfs path:%s rc:%d errno:%d(%s)", tmp, rc, errno, strerror(errno));  
      fs_attr->fs_type = hioi_err_errno (errno);
      break;
    }

    memset (fs_attr, 0, sizeof (*fs_attr));

    fs_attr->fs_bavail  = fsinfo.f_bavail;
    fs_attr->fs_btotal  = fsinfo.f_blocks;
    fs_attr->fs_bsize   = fsinfo.f_bsize;

    /* set some reasonable defaults for striping parameters */
    fs_attr->fs_scount = 1;
    fs_attr->fs_ssize = fs_attr->fs_bsize;

    /* get filesytem specific data */
    switch (fsinfo.f_type) {
#if defined(LL_SUPER_MAGIC)
    case LL_SUPER_MAGIC:
      hioi_fs_query_lustre (tmp, fs_attr);
      break;
#endif

#if defined(GPFS_SUPER_MAGIC)
    case GPFS_SUPER_MAGIC:
      /* gpfs */
      break;
#endif

#if defined(PAN_FS_CLIENT_MAGIC)
    case PAN_FS_CLIENT_MAGIC:
      /* panfs */
      break;
#endif
#if HIO_USE_DATAWARP
    case DW_SUPER_MAGIC:
      hioi_fs_query_datawarp (tmp, fs_attr);
      break;
#endif
    }

    hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "filesystem query: path: %s, type: %d, flags: 0x%x, block size: %" PRIu64
              " block count: %" PRIu64 " blocks free: %" PRIu64 " stripe count: %" PRIu32 " stripe max count: %" PRIu32
              " stripe unit: %" PRIu64 " stripe size: %" PRIu64 " stripe max size: %" PRIu64, tmp, fs_attr->fs_type,
              fs_attr->fs_flags, fs_attr->fs_bsize, fs_attr->fs_btotal, fs_attr->fs_bavail, fs_attr->fs_scount,
              fs_attr->fs_smax_count, fs_attr->fs_sunit, fs_attr->fs_ssize, fs_attr->fs_smax_size);

  } while (0);

#if HIO_MPI_HAVE(1)
  if (hioi_context_using_mpi (context)) {
    MPI_Bcast (fs_attr, sizeof (*fs_attr), MPI_BYTE, 0, context->c_comm);
  }
#endif
  if (0 > fs_attr->fs_type) {
    return fs_attr->fs_type;
  }

  fs_attr->fs_open = hio_fs_open_fns[fs_attr->fs_type];
  /* if this assert is hit the above array needs to be updated */
  assert (NULL != fs_attr->fs_open);

  return HIO_SUCCESS;
}
static int builtin_posix_create_dataset_dirs (builtin_posix_module_t *posix_module, builtin_posix_module_dataset_t *posix_dataset) {
  mode_t access_mode = posix_module->access_mode;
  hio_context_t context = posix_module->base.context;
  char *path;
  int rc;

  if (context->c_rank > 0) {
    return HIO_SUCCESS;
  }

  /* create the data directory*/
  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix: creating dataset directory @ %s", posix_dataset->base_path);

  rc = asprintf (&path, "%s/data", posix_dataset->base_path);
  if (0 > rc) {
    return hioi_err_errno (errno);
  }

  rc = hio_mkpath (context, path, access_mode);
  if (0 > rc || EEXIST == errno) {
    if (EEXIST != errno) {
      hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s",
                    path);
    }
    free (path);

    return hioi_err_errno (errno);
  }

  /* set striping parameters on the directory */
  if (posix_dataset->base.ds_fsattr.fs_flags & HIO_FS_SUPPORTS_STRIPING) {
    rc = hioi_fs_set_stripe (path, &posix_dataset->base.ds_fsattr);
    if (HIO_SUCCESS != rc) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: could not set file system striping on %s", path);
    }
  }

  free (path);

  /* create trace directory if requested */
  if (context->c_enable_tracing) {
    rc = asprintf (&path, "%s/trace", posix_dataset->base_path);
    if (0 > rc) {
      return hioi_err_errno (errno);
    }

    rc = hio_mkpath (context, path, access_mode);
    if (0 > rc || EEXIST == errno) {
      if (EEXIST != errno) {
        hioi_err_push (hioi_err_errno (errno), &context->c_object, "posix: error creating context directory: %s",
                       path);
      }
      free (path);

      return hioi_err_errno (errno);
    }
  }

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix: successfully created dataset directories %s", posix_dataset->base_path);

  return HIO_SUCCESS;
}
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  hio_context_t context = hioi_object_context ((hio_object_t) dataset);
  hio_module_t *module = dataset->ds_module;
  unsigned char *manifest = NULL;
  uint64_t start, stop;
  int rc = HIO_SUCCESS;
  size_t manifest_size;

  start = hioi_gettime ();

  for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) {
    if (posix_dataset->files[i].f_bid >= 0) {
      POSIX_TRACE_CALL(posix_dataset, hioi_file_close (posix_dataset->files + i), "file_close",
                       posix_dataset->files[i].f_bid, 0);
    }
  }

#if HIO_MPI_HAVE(3)
  /* release the shared state if it was allocated */
  (void) hioi_dataset_shared_fini (dataset);

  /* release the dataset map if one was allocated */
  (void) hioi_dataset_map_release (dataset);
#endif


  if (dataset->ds_flags & HIO_FLAG_WRITE) {
    char *path;

    /* write manifest header */
    POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, false, true),
                     "gather_manifest", 0, 0);
    if (HIO_SUCCESS != rc) {
      dataset->ds_status = rc;
    }

    if (0 == context->c_rank) {
      rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path);
      if (0 > rc) {
        /* out of memory. not much we can do now */
        return hioi_err_errno (errno);
      }

      rc = hioi_manifest_save (dataset, manifest, manifest_size, path);
      free (manifest);
      free (path);
      if (HIO_SUCCESS != rc) {
        hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest");
      }
    }

#if HIO_MPI_HAVE(3)
    if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) {
      /* optimized mode requires a data manifest to describe how the data landed on the filesystem */
      POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_gather_manifest_comm (dataset, context->c_shared_comm, &manifest, &manifest_size,
                                                                              posix_dataset->ds_use_bzip, false),
                       "gather_manifest", 0, 0);
      if (HIO_SUCCESS != rc) {
        dataset->ds_status = rc;
      }

      if (NULL != manifest) {
        rc = asprintf (&path, "%s/manifest.%x.json%s", posix_dataset->base_path, context->c_rank,
                       posix_dataset->ds_use_bzip ? ".bz2" : "");
        if (0 > rc) {
          return hioi_err_errno (errno);
        }

        rc = hioi_manifest_save (dataset, manifest, manifest_size, path);
        free (manifest);
        free (path);
        if (HIO_SUCCESS != rc) {
          hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest");
        }
      }
    }
#endif
  }

#if HIO_MPI_HAVE(1)
  /* ensure all ranks have closed the dataset before continuing */
  if (hioi_context_using_mpi (context)) {
    MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm);
  }
#endif

  free (posix_dataset->base_path);

  stop = hioi_gettime ();

  builtin_posix_trace (posix_dataset, "close", 0, 0, start, stop);

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset "
            "%s:%" PRIu64 " on data root %s. close time %" PRIu64 " usec", hioi_object_identifier(dataset),
            dataset->ds_id, module->data_root, stop - start);

  builtin_posix_trace (posix_dataset, "trace_end", 0, 0, 0, 0);
  if (posix_dataset->ds_trace_fh) {
    fclose (posix_dataset->ds_trace_fh);
  }

  return rc;
}
static int builtin_posix_module_dataset_open (struct hio_module_t *module, hio_dataset_t dataset) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  builtin_posix_module_t *posix_module = (builtin_posix_module_t *) module;
  hio_context_t context = hioi_object_context ((hio_object_t) dataset);
  unsigned char *manifest = NULL;
  size_t manifest_size = 0;
  uint64_t start, stop;
  int rc = HIO_SUCCESS;
  char *path = NULL;

  start = hioi_gettime ();

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "posix:dataset_open: opening dataset %s:%lu mpi: %d flags: 0x%x mode: 0x%x",
	    hioi_object_identifier (dataset), (unsigned long) dataset->ds_id, hioi_context_using_mpi (context),
            dataset->ds_flags, dataset->ds_mode);

  rc = builtin_posix_module_dataset_init (module, posix_dataset);
  if (HIO_SUCCESS != rc) {
    return rc;
  }

  rc = builtin_posix_module_setup_striping (context, module, dataset);
  if (HIO_SUCCESS != rc) {
    return rc;
  }

  if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode) {
    hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_fcount,
                     "dataset_file_count", HIO_CONFIG_TYPE_UINT64, NULL, "Number of files to use "
                     "in strided file mode", 0);
  } else if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) {
    posix_dataset->ds_use_bzip = true;
    hioi_config_add (context, &dataset->ds_object, &posix_dataset->ds_use_bzip,
                     "dataset_use_bzip", HIO_CONFIG_TYPE_BOOL, NULL,
                     "Use bzip2 compression for dataset manifests", 0);
  }

  if (dataset->ds_flags & HIO_FLAG_TRUNC) {
    /* blow away the existing dataset */
    if (0 == context->c_rank) {
      (void) builtin_posix_module_dataset_unlink (module, hioi_object_identifier(dataset),
                                                  dataset->ds_id);
    }
  }

  if (!(dataset->ds_flags & HIO_FLAG_CREAT)) {
    if (0 == context->c_rank) {
      /* load manifest. the manifest data will be shared with other processes in hioi_dataset_scatter */
      rc = asprintf (&path, "%s/manifest.json", posix_dataset->base_path);
      assert (0 < rc);
      if (access (path, F_OK)) {
        /* this should never happen on a valid dataset */
        free (path);
        hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: could not find top-level manifest %s", path);
        rc = HIO_ERR_NOT_FOUND;
      } else {
        rc = HIO_SUCCESS;
      }
    }

    /* read the manifest if it exists */
    if (HIO_SUCCESS == rc) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: loading manifest header from %s...", path);
      rc = hioi_manifest_read (path, &manifest, &manifest_size);
      free (path);
      path = NULL;
    }
  } else if (0 == context->c_rank) {
    rc = builtin_posix_create_dataset_dirs (posix_module, posix_dataset);
    if (HIO_SUCCESS == rc) {
      /* serialize the manifest to send to remote ranks */
      rc = hioi_manifest_serialize (dataset, &manifest, &manifest_size, false, false);
    }
  }

#if HIO_MPI_HAVE(1)
  /* share dataset header will all processes in the communication domain */
  rc = hioi_dataset_scatter_comm (dataset, context->c_comm, manifest, manifest_size, rc);
#endif
  free (manifest);
  if (HIO_SUCCESS != rc) {
    free (posix_dataset->base_path);
    return rc;
  }

  if (context->c_enable_tracing) {
    char *path;

    rc = asprintf (&path, "%s/trace/trace.%d", posix_dataset->base_path, context->c_rank);
    if (rc > 0) {
      posix_dataset->ds_trace_fh = fopen (path, "a");
      free (path);
    }

    builtin_posix_trace (posix_dataset, "trace_begin", 0, 0, 0, 0);
  }

#if HIO_MPI_HAVE(3)
  if (!(dataset->ds_flags & HIO_FLAG_CREAT) && HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) {
    rc = bultin_posix_scatter_data (posix_dataset);
    if (HIO_SUCCESS != rc) {
      free (posix_dataset->base_path);
      return rc;
    }
  }

  /* if possible set up a shared memory window for this dataset */
  POSIX_TRACE_CALL(posix_dataset, hioi_dataset_shared_init (dataset, 1), "shared_init", 0, 0);

  if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) {
    if (2 > context->c_size || NULL == dataset->ds_shared_control) {
      /* no point in using optimized mode in this case */
      posix_dataset->ds_fmode = HIO_FILE_MODE_BASIC;
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: optimized file mode requested but not supported in this "
                "dataset mode. falling back to basic file mode, path: %s", posix_dataset->base_path);
    } else if (HIO_SET_ELEMENT_SHARED == dataset->ds_mode) {
      POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_generate_map (dataset), "generate_map", 0, 0);
      if (HIO_SUCCESS != rc) {
        free (posix_dataset->base_path);
        return rc;
      }
    }
  }

  /* NTH: if requested more code is needed to load an optimized dataset with an older MPI */
#endif /* HIO_MPI_HAVE(3) */

  dataset->ds_module = module;
  dataset->ds_close = builtin_posix_module_dataset_close;
  dataset->ds_element_open = builtin_posix_module_element_open;
  dataset->ds_process_reqs = builtin_posix_module_process_reqs;

  /* record the open time */
  gettimeofday (&dataset->ds_otime, NULL);

  stop = hioi_gettime ();

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully %s posix dataset "
            "%s:%" PRIu64 " on data root %s. open time %" PRIu64 " usec",
            (dataset->ds_flags & HIO_FLAG_CREAT) ? "created" : "opened", hioi_object_identifier(dataset),
            dataset->ds_id, module->data_root, stop - start);

  builtin_posix_trace (posix_dataset, "open", 0, 0, start, stop);

  return HIO_SUCCESS;
}
static int builtin_posix_module_dataset_list (struct hio_module_t *module, const char *name,
                                              hio_dataset_header_t **headers, int *count) {
  hio_context_t context = module->context;
  int num_set_ids = 0, set_id_index = 0;
  int rc = HIO_SUCCESS;
  struct dirent *dp;
  char *path = NULL;
  DIR *dir;

  *headers = NULL;
  *count = 0;

  do {
    if (0 != context->c_rank) {
      break;
    }

    rc = asprintf (&path, "%s/%s.hio/%s", module->data_root, hioi_object_identifier(context), name);
    assert (0 <= rc);

    dir = opendir (path);
    if (NULL == dir) {
      num_set_ids = 0;
      break;
    }

    while (NULL != (dp = readdir (dir))) {
      if (dp->d_name[0] != '.') {
        num_set_ids++;
      }
    }

    *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers));
    assert (NULL != *headers);

    rewinddir (dir);

    while (NULL != (dp = readdir (dir))) {
      if ('.' == dp->d_name[0]) {
        continue;
      }

      char *manifest_path;

      rc = asprintf (&manifest_path, "%s/%s/manifest.json.bz2", path, dp->d_name);
      assert (0 <= rc);

      rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path);
      if (HIO_SUCCESS == rc) {
        ++set_id_index;
      } else {
        free (manifest_path);
        rc = asprintf (&manifest_path, "%s/%s/manifest.json", path, dp->d_name);
        assert (0 <= rc);

        rc = hioi_manifest_read_header (context, headers[0] + set_id_index, manifest_path);
        if (HIO_SUCCESS == rc) {
          ++set_id_index;
        } else {
          /* skip dataset */
          hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_list: could not read manifest at path: %s. rc: %d",
                    manifest_path, rc);
        }
      }

      free (manifest_path);
    }

    num_set_ids = set_id_index;
  } while (0);

#if HIO_USE_MPI
  if (hioi_context_using_mpi (context)) {
    MPI_Bcast (&num_set_ids, 1, MPI_INT, 0, context->c_comm);
  }
#endif

  if (0 == context->c_rank) {
    closedir (dir);
    free (path);
  }

  if (0 == num_set_ids) {
    free (*headers);
    *headers = NULL;

    return HIO_SUCCESS;
  }

  if (0 != context->c_rank) {
    *headers = (hio_dataset_header_t *) calloc (num_set_ids, sizeof (**headers));
    assert (NULL != *headers);
  }

#if HIO_USE_MPI
  if (hioi_context_using_mpi (context)) {
    MPI_Bcast (*headers, sizeof (**headers) * num_set_ids, MPI_BYTE, 0, context->c_comm);
  }
#endif

  *count = num_set_ids;

  return HIO_SUCCESS;
}
Exemple #22
0
static int hioi_config_set_value_internal (hio_context_t context, hio_var_t *var, const char *strval) {
  uint64_t intval = hioi_string_to_int(strval);

  if (NULL == strval) {
    /* empty value. nothing to do */
    return HIO_SUCCESS;
  }

  if (var->var_enum) {
    bool found = false;

    if ((uint64_t) -1 == intval) {
      for (int i = 0 ; i < var->var_enum->count ; ++i) {
        if (0 == strcmp (var->var_enum->values[i].string_value, strval)) {
          intval = var->var_enum->values[i].value;
          found = true;
          break;
        }
      }
    } else {
      for (int i = 0 ; i < var->var_enum->count ; ++i) {
        if (intval == var->var_enum->values[i].value) {
          found = true;
          break;
        }
      }
    }

    if (found) {
      hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "Setting enumeration value to %" PRIu64, intval);
    } else {
      hioi_log (context, HIO_VERBOSE_WARN, "Invalid enumeration value provided for variable %s. Got %s",
                var->var_name, strval);
      return HIO_ERR_BAD_PARAM;
    }
  }

  switch (var->var_type) {
  case HIO_CONFIG_TYPE_BOOL:
    if (0 == strcmp (strval, "true") || 0 == strcmp (strval, "t") ||
        0 == strcmp (strval, "1")) {
      var->var_storage->boolval = true;
    } else if (0 == strcmp (strval, "false") || 0 == strcmp (strval, "f") ||
               0 == strcmp (strval, "0")) {
      var->var_storage->boolval = false;
    } else {
      var->var_storage->boolval = !!intval;
    }

    break;
  case HIO_CONFIG_TYPE_STRING:
    if (var->var_storage->strval) {
      free (var->var_storage->strval);
    }

    var->var_storage->strval = strdup (strval);

    break;
  case HIO_CONFIG_TYPE_INT32:
    var->var_storage->int32val = (int32_t) (intval & 0xffffffff);
    break;
  case HIO_CONFIG_TYPE_UINT32:
    var->var_storage->uint32val = (uint32_t) (intval & 0xffffffffu);
    break;
  case HIO_CONFIG_TYPE_INT64:
    var->var_storage->int64val = (int64_t) intval;
    break;
  case HIO_CONFIG_TYPE_UINT64:
    var->var_storage->uint64val = intval;
    break;
  case HIO_CONFIG_TYPE_FLOAT:
    var->var_storage->floatval = strtof (strval, NULL);
    break;
  case HIO_CONFIG_TYPE_DOUBLE:
    var->var_storage->doubleval = strtod (strval, NULL);
    break;
  }

  return HIO_SUCCESS;
}
static int builtin_posix_module_dataset_close (hio_dataset_t dataset) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  builtin_posix_module_t *posix_module = (builtin_posix_module_t *) dataset->ds_module;
  hio_context_t context = hioi_object_context ((hio_object_t) dataset);
  hio_module_t *module = dataset->ds_module;
  unsigned char *manifest;
  uint64_t start, stop;
  int rc = HIO_SUCCESS;
  size_t manifest_size;

  start = hioi_gettime ();

  for (int i = 0 ; i < HIO_POSIX_MAX_OPEN_FILES ; ++i) {
    if (posix_dataset->files[i].f_file.f_hndl != NULL) {
      fclose (posix_dataset->files[i].f_file.f_hndl);
      posix_dataset->files[i].f_file.f_hndl = NULL;
    }
  }

  if (dataset->ds_flags & HIO_FLAG_WRITE) {
    rc = hioi_dataset_gather_manifest (dataset, &manifest, &manifest_size, dataset->ds_use_bzip);
    if (HIO_SUCCESS != rc) {
      dataset->ds_status = rc;
    }

    if (0 == context->c_rank) {
      char *path;

      rc = asprintf (&path, "%s/manifest.json%s", posix_dataset->base_path,
                     dataset->ds_use_bzip ? ".bz2" : "");
      if (0 < rc) {
        int fd;

        errno = 0;
        fd = open (path, O_CREAT | O_WRONLY, posix_module->access_mode);
        if (0 <= fd) {
          (void) write (fd, manifest, manifest_size);
          close (fd);
        }
        free (manifest);

        rc = hioi_err_errno (errno);

        free (path);
        if (HIO_SUCCESS != rc) {
          hioi_err_push (rc, &dataset->ds_object, "posix: error writing dataset manifest");
        }
      } else {
        rc = HIO_ERR_OUT_OF_RESOURCE;
      }
    }
  }

#if HIO_USE_MPI
  /* ensure all ranks have closed the dataset before continuing */
  if (hioi_context_using_mpi (context)) {
    MPI_Allreduce (MPI_IN_PLACE, &rc, 1, MPI_INT, MPI_MIN, context->c_comm);
  }
#endif

  free (posix_dataset->base_path);

  pthread_mutex_destroy (&posix_dataset->lock);

  stop = hioi_gettime ();

  hioi_log (context, HIO_VERBOSE_DEBUG_LOW, "posix:dataset_open: successfully closed posix dataset %s:%llu on data root %s. "
            "close time %lu usec", hioi_object_identifier(dataset), dataset->ds_id, module->data_root, stop - start);

  return rc;
}
static int builtin_posix_element_translate_opt (builtin_posix_module_t *posix_module, hio_element_t element,
                                                uint64_t offset, size_t *size, hio_file_t **file_out,
                                                bool reading) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) hioi_element_dataset (element);
  hio_context_t context = hioi_object_context (&element->e_object);
  hio_file_t *file;
  uint64_t file_offset;
  int file_index = 0;
  char *path;
  int rc;

  hioi_log (context, HIO_VERBOSE_DEBUG_MED, "translating element %s offset %" PRIu64 " size %lu",
            hioi_object_identifier (&element->e_object), offset, *size);
  POSIX_TRACE_CALL(posix_dataset, rc = hioi_element_translate_offset (element, offset, &file_index, &file_offset, size),
                   "translate_offset", offset, *size);
#if HIO_MPI_HAVE(3)
  if (HIO_SUCCESS != rc && reading) {
    POSIX_TRACE_CALL(posix_dataset, rc = hioi_dataset_map_translate_offset (element, offset, &file_index, &file_offset, size),
                     "map_translate_offset", offset, *size);
  }
#endif

  if (HIO_SUCCESS != rc) {
    if (reading) {
      hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset %" PRIu64 " not found", offset);
      /* not found */
      return rc;
    }

    file_offset = builtin_posix_reserve (posix_dataset, size);

    if (hioi_context_using_mpi (context)) {
      file_index = posix_dataset->base.ds_shared_control->s_master;
    } else {
      file_index = 0;
    }

    rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index);
    if (0 > rc) {
      return HIO_ERR_OUT_OF_RESOURCE;
    }

    hioi_element_add_segment (element, file_index, file_offset, offset, *size);
  } else {
    hioi_log (context, HIO_VERBOSE_DEBUG_MED, "offset found in file @ rank %d, offset %" PRIu64
              ", size %lu", file_index, file_offset, *size);
    rc = asprintf (&path, "%s/data/data.%x", posix_dataset->base_path, file_index);
    if (0 > rc) {
      return HIO_ERR_OUT_OF_RESOURCE;
    }

    if (access (path, R_OK)) {
      free (path);
      rc = asprintf (&path, "%s/data.%x", posix_dataset->base_path, file_index);
      if (0 > rc) {
        return HIO_ERR_OUT_OF_RESOURCE;
      }
    }
  }

  /* use crc as a hash to pick a file index to use */
  int internal_index = file_index % HIO_POSIX_MAX_OPEN_FILES;
  file = posix_dataset->files + internal_index;

  if (file_index != file->f_bid) {
    if (file->f_bid >= 0) {
      POSIX_TRACE_CALL(posix_dataset, hioi_file_close (file), "file_close", file->f_bid, 0);
    }

    file->f_bid = -1;

    POSIX_TRACE_CALL(posix_dataset, rc = builtin_posix_open_file (posix_module, posix_dataset, path, file),
                     "file_open", file_index, 0);
    if (HIO_SUCCESS != rc) {
      free (path);
      return rc;
    }

    file->f_bid = file_index;
  }

  free (path);

  POSIX_TRACE_CALL(posix_dataset, hioi_file_seek (file, file_offset, SEEK_SET), "file_seek", file->f_bid, file_offset);

  *file_out = file;

  return HIO_SUCCESS;
}
static int builtin_posix_module_setup_striping (hio_context_t context, struct hio_module_t *module, hio_dataset_t dataset) {
  builtin_posix_module_dataset_t *posix_dataset = (builtin_posix_module_dataset_t *) dataset;
  hio_fs_attr_t *fs_attr = &dataset->ds_fsattr;
  int rc;

  /* query the filesystem for current striping parameters */
  rc = hioi_fs_query (context, module->data_root, fs_attr);
  if (HIO_SUCCESS != rc) {
    hioi_err_push (rc, &context->c_object, "posix: error querying the filesystem");
    return rc;
  }

  /* for now do not use stripe exclusivity in any path */
  posix_dataset->my_stripe = 0;

  /* set default stripe count */
  fs_attr->fs_scount = 1;

  posix_dataset->ds_fcount = 1;

  if (fs_attr->fs_flags & HIO_FS_SUPPORTS_STRIPING) {
    if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode) {
      /* pick a reasonable default stripe size */
      fs_attr->fs_ssize = 1 << 24;

      /* use group locking if available as we guarantee stripe exclusivity in optimized mode */
      fs_attr->fs_use_group_locking = true;

#if HIO_MPI_HAVE(3)
      /* if group locking is not available then each rank should attempt to write to
       * a different stripe to maximize the available IO bandwidth */
      fs_attr->fs_scount = min(context->c_shared_size, fs_attr->fs_smax_count);
#endif
    } else if (HIO_FILE_MODE_STRIDED == posix_dataset->ds_fmode) {
      /* pick a reasonable default stripe size */
      fs_attr->fs_ssize = posix_dataset->ds_bs;

      posix_dataset->ds_fcount = fs_attr->fs_smax_count * 32;
      fs_attr->fs_scount = 16;
      if (context->c_size < posix_dataset->ds_fcount) {
        posix_dataset->ds_fcount = context->c_size;
      }
    } else if (HIO_SET_ELEMENT_UNIQUE != dataset->ds_mode) {
      /* set defaults striping count */
      fs_attr->fs_ssize = 1 << 20;
      fs_attr->fs_scount = max (1, (unsigned) ((float) fs_attr->fs_smax_count * 0.9));
    } else {
      fs_attr->fs_ssize = 1 << 20;
    }

    hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_scount,
                     "stripe_count", HIO_CONFIG_TYPE_UINT32, NULL, "Stripe count for all dataset "
                     "data files", 0);

    hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_ssize,
                     "stripe_size", HIO_CONFIG_TYPE_UINT64, NULL, "Stripe size for all dataset "
                     "data files", 0);

    if (fs_attr->fs_flags & HIO_FS_SUPPORTS_RAID) {
      hioi_config_add (context, &dataset->ds_object, &fs_attr->fs_raid_level,
                       "raid_level", HIO_CONFIG_TYPE_UINT64, NULL, "RAID level for dataset "
                       "data files. Keep in mind that some filesystems only support 1/2 RAID "
                       "levels", 0);
    }

    /* ensure stripe count is sane */
    if (fs_attr->fs_scount > fs_attr->fs_smax_count) {
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe count %u exceeds the available resources. "
                "adjusting to maximum %u", fs_attr->fs_scount, fs_attr->fs_smax_count);
      fs_attr->fs_scount = fs_attr->fs_smax_count;
    }

    /* ensure the stripe size is a multiple of the stripe unit */
    fs_attr->fs_ssize = fs_attr->fs_sunit * ((fs_attr->fs_ssize + fs_attr->fs_sunit - 1) / fs_attr->fs_sunit);
    if (fs_attr->fs_ssize > fs_attr->fs_smax_size) {
      hioi_log (context, HIO_VERBOSE_WARN, "posix:dataset_open: requested stripe size %" PRIu64 " exceeds the maximum %"
                PRIu64 ". ", fs_attr->fs_ssize, fs_attr->fs_smax_size);
      fs_attr->fs_ssize = fs_attr->fs_smax_size;
    }

    if (HIO_FILE_MODE_OPTIMIZED == posix_dataset->ds_fmode && posix_dataset->ds_bs < fs_attr->fs_ssize) {
      posix_dataset->ds_bs = fs_attr->fs_ssize;
    }
  }

  return HIO_SUCCESS;
}