Beispiel #1
0
/* searchs for name and returns a newly allocated hash of its value if set,
 * returns NULL if not found */
scr_hash* scr_param_get_hash(char* name)
{
  scr_hash* hash = NULL;
  scr_hash* value_hash = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* TODO: need to strdup here to be safe? */
    hash = scr_hash_new();
    scr_hash_set(hash, getenv(name), scr_hash_new());
    return hash;
  }

  /* otherwise, if parameter is set in user configuration file, return that value */
  value_hash = scr_hash_get(scr_user_hash, name);
  if (no_user == NULL && value_hash != NULL) {
    hash = scr_hash_new();
    scr_hash_merge(hash, value_hash);
    return hash;
  }

  /* otherwise, if parameter is set in system configuration file, return that value */
  value_hash = scr_hash_get(scr_system_hash, name);
  if (value_hash != NULL) {
    hash = scr_hash_new();
    scr_hash_merge(hash, value_hash);
    return hash;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Beispiel #2
0
/* given a hash of files and a file name, check whether the named
 * file needs data transfered, if so, strdup its destination name
 * and set its position and filesize */
int need_transfer(scr_hash* files, char* src, char** dst, off_t* position, off_t* filesize)
{
  /* check that we got a hash of files and a file name */
  if (files == NULL || src == NULL) {
    return SCR_FAILURE;
  }

  /* lookup the specified file in the hash */
  scr_hash* file_hash = scr_hash_get(files, src);
  if (file_hash == NULL) {
    return SCR_FAILURE;
  }

  /* extract the values for file size, bytes written, and destination */
  unsigned long size, written;
  char* dest;
  if (scr_hash_util_get_bytecount(file_hash, SCR_TRANSFER_KEY_SIZE, &size) == SCR_SUCCESS &&
      scr_hash_util_get_bytecount(file_hash, SCR_TRANSFER_KEY_WRITTEN, &written) == SCR_SUCCESS &&
      scr_hash_util_get_str(file_hash, SCR_TRANSFER_KEY_DESTINATION, &dest) == SCR_SUCCESS)
  {
    /* if the bytes written value is less than the file size,
     * we've got a valid file */
    if (written < size) {
      /* got our file, fill in output parameters */
      *dst = strdup(dest);
      /* TODO: check for error */
      *position = (off_t) written;
      *filesize = (off_t) size;
      return SCR_SUCCESS;
    }
  }

  return SCR_FAILURE;
}
Beispiel #3
0
/* searchs for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* TODO: need to strdup here to be safe? */
    return getenv(name);
  }

  /* otherwise, if parameter is set in user configuration file, return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file, return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Beispiel #4
0
/* read config files and store contents */
int scr_param_init()
{
  /* allocate storage and read in config files if we haven't already */
  if (scr_param_ref_count == 0) {
    /* allocate hash object to hold names we cannot read from the
     * environment */
    scr_no_user_hash = scr_hash_new();
    scr_hash_set(scr_no_user_hash, "SCR_CNTL_BASE", scr_hash_new());

    /* allocate hash object to store values from user config file,
     * if specified */
    char* user_file = user_config_path();
    if (user_file != NULL) {
      scr_user_hash = scr_hash_new();
      scr_config_read(user_file, scr_user_hash);
    }
    scr_free(&user_file);

    /* allocate hash object to store values from system config file */
    scr_system_hash = scr_hash_new();
    scr_config_read(scr_config_file, scr_system_hash);

    /* initialize our hash to cache lookups to getenv */
    scr_env_hash = scr_hash_new();

    /* warn user if he set any parameters in his environment or user
     * config file which aren't permitted */
    scr_hash_elem* elem;
    for (elem = scr_hash_elem_first(scr_no_user_hash);
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the parameter name */
      char* key = scr_hash_elem_key(elem);

      char* env_val = getenv(key);
      scr_hash* env_hash = scr_hash_get(scr_user_hash, key);

      /* check whether this is set in the environment */
      if (env_val != NULL || env_hash != NULL) {
        scr_err("%s cannot be set in the environment or user configuration file, ignoring setting",
          key
        );
      }
    }
  }

  /* increment our reference count */
  scr_param_ref_count++;

  return SCR_SUCCESS;
}
Beispiel #5
0
/* searches for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* we don't just return the getenv value directly because that causes
     * segfaults on some systems, so instead we add it to a hash and return
     * the pointer into the hash */

    /* try to lookup the value for this name in case we've already cached it */
    if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
      /* it's not in the hash yet, so add it */
      char* tmp_value = strdup(getenv(name));
      scr_hash_util_set_str(scr_env_hash, name, tmp_value);
      scr_free(&tmp_value);

      /* now issue our lookup again */
      if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
        /* it's an error if we don't find it this time */
        scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d",
          name, __FILE__, __LINE__
        );
      }
    }
    
    return value;
  }

  /* otherwise, if parameter is set in user configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Beispiel #6
0
/* since on a restart we may end up with more or fewer ranks on a node than the
 * previous run, rely on the master to read in and distribute the filemap to
 * other ranks on the node */
int scr_scatter_filemaps(scr_filemap* my_map)
{
  /* TODO: if the control directory is on a device shared by lots of procs,
   * we should read and distribute this data in a more scalable way */

  /* allocate empty send hash */
  scr_hash* send_hash = scr_hash_new();

  /* if i'm the master on this node, read in all filemaps */
  if (scr_storedesc_cntl->rank == 0) {
    /* create an empty filemap */
    scr_filemap* all_map = scr_filemap_new();

    /* read in the master map */
    scr_hash* hash = scr_hash_new();
    scr_hash_read_path(scr_master_map_file, hash);

    /* for each filemap listed in the master map */
    scr_hash_elem* elem;
    for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap"));
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the filename of this filemap */
      char* file = scr_hash_elem_key(elem);

      /* TODO MEMFS: mount storage for each filemap */

      /* read in the filemap */
      scr_filemap* tmp_map = scr_filemap_new();
      scr_path* path_file = scr_path_from_str(file);
      scr_filemap_read(path_file, tmp_map);
      scr_path_delete(&path_file);

      /* merge it with the all_map */
      scr_filemap_merge(all_map, tmp_map);

      /* delete filemap */
      scr_filemap_delete(&tmp_map);

      /* TODO: note that if we fail after unlinking this file but before
       * writing out the new file, we'll lose information */

      /* delete the file */
      scr_file_unlink(file);
    }

    /* free the hash object */
    scr_hash_delete(&hash);

    /* write out new local 0 filemap */
    if (scr_filemap_num_ranks(all_map) > 0) {
      scr_filemap_write(scr_map_file, all_map);
    }

    /* get global rank of each rank */
    int* ranks = (int*) SCR_MALLOC(scr_storedesc_cntl->ranks * sizeof(int));
    MPI_Gather(
      &scr_my_rank_world, 1, MPI_INT, ranks, 1, MPI_INT,
      0, scr_storedesc_cntl->comm
    );

    /* for each rank, send them their own file data if we have it */
    int i;
    for (i=0; i < scr_storedesc_cntl->ranks; i++) {
      int rank = ranks[i];
      if (scr_filemap_have_rank(all_map, rank)) {
        /* extract the filemap for this rank */
        scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, rank);

        /* get a reference to the hash object that we'll send to this rank,
         * and merge this filemap into it */
        scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i);
        if (tmp_hash == NULL) {
          /* if we don't find an existing entry in the send_hash,
           * create an empty hash and insert it */
          scr_hash* empty_hash = scr_hash_new();
          scr_hash_setf(send_hash, empty_hash, "%d", i);
          tmp_hash = empty_hash;
        }
        scr_hash_merge(tmp_hash, tmp_map);

        /* delete the filemap for this rank */
        scr_filemap_delete(&tmp_map);
      }
    }

    /* free our rank list */
    scr_free(&ranks);

    /* now just round robin the remainder across the set (load balancing) */
    int num;
    int* remaining_ranks = NULL;
    scr_filemap_list_ranks(all_map, &num, &remaining_ranks);

    int j = 0;
    while (j < num) {
      /* pick a rank in to send to */
      i = j % scr_storedesc_cntl->ranks;

      /* extract the filemap for this rank */
      scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, remaining_ranks[j]);

      /* get a reference to the hash object that we'll send to this rank,
       * and merge this filemap into it */
      scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i);
      if (tmp_hash == NULL) {
        /* if we don't find an existing entry in the send_hash,
         * create an empty hash and insert it */
        scr_hash* empty_hash = scr_hash_new();
        scr_hash_setf(send_hash, empty_hash, "%d", i);
        tmp_hash = empty_hash;
      }
      scr_hash_merge(tmp_hash, tmp_map);

      /* delete the filemap for this rank */
      scr_filemap_delete(&tmp_map);
      j++;
    }

    scr_free(&remaining_ranks);

    /* delete the filemap */
    scr_filemap_delete(&all_map);

    /* write out the new master filemap */
    hash = scr_hash_new();
    char file[SCR_MAX_FILENAME];
    for (i=0; i < scr_storedesc_cntl->ranks; i++) {
      sprintf(file, "%s/filemap_%d.scrinfo", scr_cntl_prefix, i);
      scr_hash_set_kv(hash, "Filemap", file);
    }
    scr_hash_write_path(scr_master_map_file, hash);
    scr_hash_delete(&hash);
  } else {
    /* send our global rank to the master */
    MPI_Gather(
      &scr_my_rank_world, 1, MPI_INT, NULL, 1, MPI_INT,
      0, scr_storedesc_cntl->comm
    );
  }

  /* receive our filemap from master */
  scr_hash* recv_hash = scr_hash_new();
  scr_hash_exchange(send_hash, recv_hash, scr_storedesc_cntl->comm);

  /* merge map sent from master into our map */
  scr_hash* map_from_master = scr_hash_getf(recv_hash, "%d", 0);
  if (map_from_master != NULL) {
    scr_hash_merge(my_map, map_from_master);
  }

  /* write out our local filemap */
  if (scr_filemap_num_ranks(my_map) > 0) {
    scr_filemap_write(scr_map_file, my_map);
  }

  /* free off our send and receive hashes */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  return SCR_SUCCESS;
}
Beispiel #7
0
/* remove any dataset ids from flush file which are not in cache,
 * and add any datasets in cache that are not in the flush file */
int scr_flush_file_rebuild(const scr_filemap* map)
{
  if (scr_my_rank_world == 0) {
    /* read the flush file */
    scr_hash* hash = scr_hash_new();
    scr_hash_read_path(scr_flush_file, hash);

    /* get ordered list of dataset ids in flush file */
    int flush_ndsets;
    int* flush_dsets;
    scr_hash* flush_dsets_hash = scr_hash_get(hash, SCR_FLUSH_KEY_DATASET);
    scr_hash_list_int(flush_dsets_hash, &flush_ndsets, &flush_dsets);

    /* get ordered list of dataset ids in cache */
    int cache_ndsets;
    int* cache_dsets;
    scr_filemap_list_datasets(map, &cache_ndsets, &cache_dsets);

    int flush_index = 0;
    int cache_index = 0;
    while (flush_index < flush_ndsets && cache_index < cache_ndsets) {
      /* get next smallest index from flush file and cache */
      int flush_dset = flush_dsets[flush_index];
      int cache_dset = cache_dsets[cache_index];

      if (flush_dset < cache_dset) {
        /* dataset exists in flush file but not in cache,
         * delete it from the flush file */
        scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset);
        flush_index++;
      } else if (cache_dset < flush_dset) {
        /* dataset exists in cache but not flush file,
         * add it to the flush file */
        scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
        scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        cache_index++;
      } else {
        /* dataset exists in cache and the flush file,
         * ensure that it is listed as being in the cache */
        scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
        scr_hash_unset_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        flush_index++;
        cache_index++;
      }
    }
    while (flush_index < flush_ndsets) {
      /* dataset exists in flush file but not in cache,
       * delete it from the flush file */
      int flush_dset = flush_dsets[flush_index];
      scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset);
      flush_index++;
    }
    while (cache_index < cache_ndsets) {
      /* dataset exists in cache but not flush file,
       * add it to the flush file */
      int cache_dset = cache_dsets[cache_index];
      scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
      scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
      cache_index++;
    }

    /* free our list of cache dataset ids */
    scr_free(&cache_dsets);

    /* free our list of flush file dataset ids */
    scr_free(&flush_dsets);

    /* write the hash back to the flush file */
    scr_hash_write_path(scr_flush_file, hash);

    /* delete the hash */
    scr_hash_delete(&hash);
  }
  return SCR_SUCCESS;
}
Beispiel #8
0
/* flush files specified in list, and record corresponding entries for summary file */
static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary)
{
  /* assume we will succeed in this flush */
  int rc = SCR_SUCCESS;

  /* flush each of my files and fill in summary data structure */
  scr_hash_elem* elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (elem = scr_hash_elem_first(files);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(elem);

    /* convert file to path and extract name of file */
    scr_path* path_name = scr_path_from_str(file);
    scr_path_basename(path_name);

    /* get the hash for this element */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get meta data for this file */
    scr_meta* meta = scr_hash_get(hash, SCR_KEY_META);

    /* if segments are defined, we flush the file to its containers,
     * otherwise we copy the file out as is */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* TODO: PRESERVE get original filename here */

      /* add this file to the summary file */
      char* name = scr_path_strdup(path_name);
      scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
      scr_free(&name);

// USERDEF fixme!
      /* flush the file to the containers listed in its segmenets */
      if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) {
        /* successfully flushed this file, record the filesize */
        unsigned long filesize = 0;
        if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
          scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
        }

        /* record the crc32 if one was computed */
        uLong crc = 0;
        if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
          scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
        }

        /* record segment information in summary file */
        scr_hash* segments_copy = scr_hash_new();
        scr_hash_merge(segments_copy, segments);
        scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy);
      } else {
        /* the flush failed */
        rc = SCR_FAILURE;

        /* explicitly mark file as incomplete */
        scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
      }
    } else {
      /* get directory to flush file to */
      char* dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) {
        /* create full path of destination file */
        scr_path* path_full = scr_path_from_str(dir);
        scr_path_append(path_full, path_name);

        /* get relative path to flushed file from SCR_PREFIX directory */
        scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full);
        if (! scr_path_is_null(path_relative)) {
          /* record the name of the file in the summary hash, and get reference to a hash for this file */
          char* name = scr_path_strdup(path_relative);
          scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
          scr_free(&name);

          /* flush the file and fill in the meta data for this file */
          if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) {
            /* successfully flushed this file, record the filesize */
            unsigned long filesize = 0;
            if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
              scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
            }

            /* record the crc32 if one was computed */
            uLong crc = 0;
            if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
              scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
            }
          } else {
            /* the flush failed */
            rc = SCR_FAILURE;

            /* explicitly mark incomplete files */
            scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
          }
        } else {
          scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d",
            dir, scr_prefix, __FILE__, __LINE__
          );
        }

        /* free relative and full paths */
        scr_path_delete(&path_relative);
        scr_path_delete(&path_full);
      } else {
        scr_abort(-1, "Failed to read directory to flush file to @ %s:%d",
          __FILE__, __LINE__
        );
      }
    }

    /* free the file name path */
    scr_path_delete(&path_name);
  }

  return rc;
}
Beispiel #9
0
/* given a hash of transfer file data, look for a file which needs to
 * be transfered. If src file is set, try to continue with that file,
 * otherwise, pick the first available file */
int find_file(scr_hash* hash, char** src, char** dst, off_t* position, off_t* filesize)
{
  int found_a_file = 0;

  scr_hash* files = scr_hash_get(hash, SCR_TRANSFER_KEY_FILES);
  if (files != NULL) {
    /* if we're given a file name, try to continue with that file */
    if (!found_a_file && src != NULL && *src != NULL) {
      /* src was set, so assume dst is also set, create a dummy dst
       * variable to hold the string which may be strdup'd in
       * need_transfer call */
      char* tmp_dst = NULL;
      if (need_transfer(files, *src, &tmp_dst, position, filesize) == SCR_SUCCESS) {
        /* can continue with the same file (position may have been
         * updated though) */
        found_a_file = 1;

        /* free the dummy */
        /* TODO: note that if destination has been updated, we're
         * ignoring that change */
        free(tmp_dst);
      } else {
        /* otherwise, this file no longer needs transfered,
         * so free the strings */
        clear_parameters(src, dst, position);
      }
    }

    /* if we still don't have a file, scan the hash and use the first
     * file we find */
    if (!found_a_file) {
      scr_hash_elem* elem;
      for (elem = scr_hash_elem_first(files);
           elem != NULL;
           elem = scr_hash_elem_next(elem))
      {
        /* get the filename */
        char* name = scr_hash_elem_key(elem);

        /* check whether this file needs transfered */
        if (name != NULL &&
            need_transfer(files, name, dst, position, filesize) == SCR_SUCCESS)
        {
          /* found a file, copy its name (the destination and postion
           * are set in need_transfer) */
          *src = strdup(name);
          found_a_file = 1;
          break;
        }
      }
    }
  }

  /* if we didn't find a file, set src and dst to NULL and set
   * position to 0 */
  if (!found_a_file) {
    clear_parameters(src, dst, position);
    return SCR_FAILURE;
  }

  return SCR_SUCCESS;
}
Beispiel #10
0
int main(int argc, char* argv[])
{
  int i, j;
  int index = 1;

  /* print usage if not enough arguments were given */
  if (argc < 2) {
    printf("Usage: scr_rebuild_xor <size> <root> <missing_xor_filename> <ordered_remaining_xor_filenames>\n");
    return 1;
  }

  /* TODO: want to pass this on command line? */
  /* get current working directory */
  char dsetdir[SCR_MAX_FILENAME];
  scr_getcwd(dsetdir, sizeof(dsetdir));

  /* create and reduce path for dataset */
  scr_path* path_dset = scr_path_from_str(dsetdir);
  scr_path_reduce(path_dset);

  /* allocate buffers */
  char* buffer_A = malloc(buffer_size * sizeof(char));
  char* buffer_B = malloc(buffer_size * sizeof(char));
  if (buffer_A == NULL || buffer_B == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the size of the XOR set */
  int xor_set_size = (int) strtol(argv[index++], (char **)NULL, 10);
  if (xor_set_size <= 0) {
    scr_err("Invalid XOR set size argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* allocate memory for data structures based on the XOR set size */
  int*   num_files  = malloc(xor_set_size * sizeof(int));
  int*   offsets    = malloc(xor_set_size * sizeof(int));
  char** xor_files  = malloc(xor_set_size * sizeof(char*));
  int*   xor_fds    = malloc(xor_set_size * sizeof(int));
  scr_hash** xor_headers = malloc(xor_set_size * sizeof(scr_hash*));
  if (num_files == NULL || offsets == NULL || xor_files == NULL || xor_fds == NULL || xor_headers == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the rank of the missing process (the root) */
  int root = (int) strtol(argv[index++], (char **)NULL, 10);
  if (root < 0 || root >= xor_set_size) {
    scr_err("Invalid root argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the missing xor filename */
  xor_files[0] = strdup(argv[index++]);
  if (xor_files[0] == NULL) {
    scr_err("Failed to dup XOR filename @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the xor filenames (expected to be in order of XOR segment number) */
  /* we order ranks so that root is index 0, the rank to the right of root is index 1, and so on */
  for (i=0; i < xor_set_size; i++) {
    xor_headers[i] = scr_hash_new();

    /* we'll get the XOR file name for root from the header stored in the XOR file of the partner */
    if (i == root) {
      continue;
    }

    /* adjust the index relative to root */
    j = i - root;
    if (j < 0) {
      j += xor_set_size;
    }

    /* copy the XOR file name */
    xor_files[j] = strdup(argv[index++]);
    if (xor_files[j] == NULL) {
      scr_err("Failed to dup XOR filename @ %s:%d",
        __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* open each of the xor files and read in the headers */
  for (i=1; i < xor_set_size; i++) {
    /* open each xor file for reading */
    xor_fds[i] = scr_open(xor_files[i], O_RDONLY);
    if (xor_fds[i] < 0) {
      scr_err("Opening xor segment file: scr_open(%s) errno=%d %s @ %s:%d",
        xor_files[i], errno, strerror(errno), __FILE__, __LINE__
      );
      return 1;
    }

    /* read the header from this xor file */
    if (scr_hash_read_fd(xor_files[i], xor_fds[i], xor_headers[i]) < 0) {
      scr_err("Failed to read XOR header from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* build header for missing XOR file */
  int partner_rank = -1;
  if (xor_set_size >= 2) {
    scr_hash_merge(xor_headers[0], xor_headers[1]);

    /* fetch our own file list from rank to our right */
    scr_hash* rhs_hash = scr_hash_get(xor_headers[1], SCR_KEY_COPY_XOR_PARTNER);
    scr_hash* current_hash = scr_hash_new();
    scr_hash_merge(current_hash, rhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT, current_hash);

    /* we are the partner to the rank to our left */
    scr_hash* lhs_hash = scr_hash_get(xor_headers[xor_set_size-1], SCR_KEY_COPY_XOR_CURRENT);
    scr_hash* partner_hash = scr_hash_new();
    scr_hash_merge(partner_hash, lhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_PARTNER, partner_hash);

    /* get global rank of partner */
    if (scr_hash_util_get_int(lhs_hash, SCR_KEY_COPY_XOR_RANK, &partner_rank) != SCR_SUCCESS) {
      scr_err("Failed to read partner rank from XOR file header in %s @ %s:%d",
        xor_files[xor_set_size-1], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* get a pointer to the current hash for the missing rank */
  scr_hash* missing_current_hash = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT);

  /* read the rank */
  int my_rank = -1;
  if (scr_hash_util_get_int(missing_current_hash, SCR_KEY_COPY_XOR_RANK, &my_rank) != SCR_SUCCESS) {
    scr_err("Failed to read rank from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get the dataset */
  scr_dataset* dataset = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_DATASET);

  /* read the dataset id */
  int dset_id = -1;
  if (scr_dataset_get_id(dataset, &dset_id) != SCR_SUCCESS) {
    scr_err("Failed to read dataset id from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* read the ranks */
  int num_ranks = -1;
  if (scr_hash_util_get_int(xor_headers[0], SCR_KEY_COPY_XOR_RANKS, &num_ranks) != SCR_SUCCESS) {
    scr_err("Failed to read ranks from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get name of partner's fmap */
  scr_path* path_partner_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_partner_map, "fmap.%d.scr", partner_rank);

  /* extract partner's flush descriptor */
  scr_hash* flushdesc = scr_hash_new();
  scr_filemap* partner_map = scr_filemap_new();
  scr_filemap_read(path_partner_map, partner_map);
  scr_filemap_get_flushdesc(partner_map, dset_id, partner_rank, flushdesc);
  scr_filemap_delete(&partner_map);

  /* delete partner map path */
  scr_path_delete(&path_partner_map);

  /* determine whether we should preserve user directories */
  int preserve_dirs = 0;
  scr_hash_util_get_int(flushdesc, SCR_SCAVENGE_KEY_PRESERVE, &preserve_dirs);

  /* read the chunk size */
  unsigned long chunk_size = 0;
  if (scr_hash_util_get_unsigned_long(xor_headers[0], SCR_KEY_COPY_XOR_CHUNK, &chunk_size) != SCR_SUCCESS) {
    scr_err("Failed to read chunk size from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* determine number of files each member wrote in XOR set */
  for (i=0; i < xor_set_size; i++) {
    /* record the number of files for this rank */
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);
    if (scr_hash_util_get_int(current_hash, SCR_KEY_COPY_XOR_FILES, &num_files[i]) != SCR_SUCCESS) {
      scr_err("Failed to read number of files from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }
  
  /* count the total number of files and set the offsets array */
  int total_num_files = 0;
  for (i=0; i < xor_set_size; i++) {
    offsets[i] = total_num_files;
    total_num_files += num_files[i];
  }

  /* allocate space for a file descriptor, file name pointer, and filesize for each user file */
  int* user_fds                 = (int*)           malloc(total_num_files * sizeof(int));
  char** user_files             = (char**)         malloc(total_num_files * sizeof(char*));
  char** user_rel_files         = (char**)         malloc(total_num_files * sizeof(char*));
  unsigned long* user_filesizes = (unsigned long*) malloc(total_num_files * sizeof(unsigned long));
  if (user_fds == NULL || user_files == NULL || user_rel_files == NULL || user_filesizes == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* get file name, file size, and open each of the user files that we have */
  for (i=0; i < xor_set_size; i++) {
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);

    /* for each file belonging to this rank, get filename, filesize, and open file */
    for (j=0; j < num_files[i]; j++) {
      int offset = offsets[i] + j;

      /* get the meta data for this file */
      scr_meta* meta = scr_hash_get_kv_int(current_hash, SCR_KEY_COPY_XOR_FILE, j);
      if (meta == NULL) {
        scr_err("Failed to read meta data for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* record the filesize of this file */
      if (scr_meta_get_filesize(meta, &user_filesizes[offset]) != SCR_SUCCESS) {
        scr_err("Failed to read filesize field for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* get filename */
      char* origname;
      if (scr_meta_get_origname(meta, &origname) != SCR_SUCCESS) {
        scr_err("Failed to read original name for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* construct full path to user file */
      scr_path* path_user_full = scr_path_from_str(origname);
      if (preserve_dirs) {
        /* get original path of file */
        char* origpath;
        if (scr_meta_get_origpath(meta, &origpath) != SCR_SUCCESS) {
          scr_err("Failed to read original path for file %d in %s @ %s:%d",
            j, xor_files[i], __FILE__, __LINE__
          );
          return 1;
        }

        /* construct full path to file */
        scr_path_prepend_str(path_user_full, origpath);
      } else {
        /* construct full path to file */
        scr_path_prepend(path_user_full, path_dset);
      }

      /* reduce path to user file */
      scr_path_reduce(path_user_full);

      /* make a copy of the full path */
      user_files[offset] = scr_path_strdup(path_user_full);

      /* make a copy of relative path */
      scr_path* path_user_rel = scr_path_relative(path_dset, path_user_full);
      user_rel_files[offset] = scr_path_strdup(path_user_rel);
      scr_path_delete(&path_user_rel);

      /* free the full path */
      scr_path_delete(&path_user_full);

      /* open the file */
      if (i == 0) {
        /* create directory for file */
        scr_path* user_dir_path = scr_path_from_str(user_files[offset]);
        scr_path_reduce(user_dir_path);
        scr_path_dirname(user_dir_path);
        if (! scr_path_is_null(user_dir_path)) {
          char* user_dir = scr_path_strdup(user_dir_path);
          mode_t mode_dir = scr_getmode(1, 1, 1);
          if (scr_mkdir(user_dir, mode_dir) != SCR_SUCCESS) {
            scr_err("Failed to create directory for user file %s @ %s:%d",
              user_dir, __FILE__, __LINE__
            );
            return 1;
          }
          scr_free(&user_dir);
        }
        scr_path_delete(&user_dir_path);

        /* open missing file for writing */
        mode_t mode_file = scr_getmode(1, 1, 0);
        user_fds[offset] = scr_open(user_files[offset], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for writing: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      } else {
        /* open existing file for reading */
        user_fds[offset] = scr_open(user_files[offset], O_RDONLY);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for reading: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      }
    }
  }

  /* finally, open the xor file for the missing rank */
  mode_t mode_file = scr_getmode(1, 1, 0);
  xor_fds[0] = scr_open(xor_files[0], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
  if (xor_fds[0] < 0) {
    scr_err("Opening xor file to be reconstructed: scr_open(%s) errno=%d %s @ %s:%d",
      xor_files[0], errno, strerror(errno), __FILE__, __LINE__
    );
    return 1;
  }

  int rc = 0;

  /* write the header to the XOR file of the missing rank */
  if (scr_hash_write_fd(xor_files[0], xor_fds[0], xor_headers[0]) < 0) {
    rc = 1;
  }

  /* this offset array records the current position we are in the logical file for each rank */
  unsigned long* offset = malloc(xor_set_size * sizeof(unsigned long));
  if (offset == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }
  for (i=0; i < xor_set_size; i++) {
    offset[i] = 0;
  }

  unsigned long write_pos = 0;
  int chunk_id;
  for (chunk_id = 0; chunk_id < xor_set_size && rc == 0; chunk_id++) {
    size_t nread = 0;
    while (nread < chunk_size && rc == 0) {
      /* read upto buffer_size bytes at a time */
      size_t count = chunk_size - nread;
      if (count > buffer_size) {
        count = buffer_size;
      }

      /* clear our buffer */
      memset(buffer_A, 0, count);

      /* read a segment from each rank and XOR it into our buffer */
      for (i=1; i < xor_set_size; i++) {
        /* read the next set of bytes for this chunk from my file into send_buf */
        if (chunk_id != ((i + root) % xor_set_size)) {
          /* read chunk from the logical file for this rank */
          if (scr_read_pad_n(num_files[i], &user_files[offsets[i]], &user_fds[offsets[i]],
                             buffer_B, count, offset[i], &user_filesizes[offsets[i]]) != SCR_SUCCESS)
          {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
          offset[i] += count;
        } else {
          /* read chunk from the XOR file for this rank */
          if (scr_read_attempt(xor_files[i], xor_fds[i], buffer_B, count) != count) {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
        }

        /* TODO: XORing with unsigned long would be faster here (if chunk size is multiple of this size) */
        /* merge the blocks via xor operation */
        for (j = 0; j < count; j++) {
          buffer_A[j] ^= buffer_B[j];
        }
      }

      /* at this point, we have the data from the missing rank, write it out */
      if (chunk_id != root) {
        /* write chunk to logical file for the missing rank */
        if (scr_write_pad_n(num_files[0], &user_files[0], &user_fds[0],
                            buffer_A, count, write_pos, &user_filesizes[0]) != SCR_SUCCESS)
        {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
        write_pos += count;
      } else {
        /* write chunk to xor file for the missing rank */
        if (scr_write_attempt(xor_files[0], xor_fds[0], buffer_A, count) != count) {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
      }

      nread += count;
    }
  }

  /* close each of the user files */
  for (i=0; i < total_num_files; i++) {
    if (scr_close(user_files[i], user_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* close each of the XOR files */
  for (i=0; i < xor_set_size; i++) {
    if (scr_close(xor_files[i], xor_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* if the write failed, delete the files we just wrote, and return an error */
  if (rc != 0) {
    for (j=0; j < num_files[0]; j++) {
      scr_file_unlink(user_files[j]);
    }
    scr_file_unlink(xor_files[0]);
    return 1;
  }

  /* check that filesizes are correct */
  unsigned long filesize;
  for (j=0; j < num_files[0]; j++) {
    filesize = scr_file_size(user_files[j]);
    if (filesize != user_filesizes[j]) {
      /* the filesize check failed, so delete the file */
      scr_file_unlink(user_files[j]);

      /* mark the file as incomplete */
      scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
      scr_meta_set_complete(meta, 0);

      rc = 1;
    }
  }
  /* TODO: we didn't record the filesize of the XOR file for the missing rank anywhere */

  /* create a filemap for this rank */
  scr_filemap* map = scr_filemap_new();
  if (map == NULL) {
    scr_err("Failed to allocate filemap @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* record the dataset information in the filemap */
  scr_filemap_set_dataset(map, dset_id, my_rank, dataset);

  /* write meta data for each of the user files and add each one to the filemap */
  for (j=0; j < num_files[0]; j++) {
    /* add user file to filemap and record meta data */
    char* user_file_relative = user_rel_files[j];
    scr_filemap_add_file(map, dset_id, my_rank, user_file_relative);
    scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
    scr_filemap_set_meta(map, dset_id, my_rank, user_file_relative, meta);
  }

  /* write meta data for xor file and add it to the filemap */
  scr_filemap_add_file(map, dset_id, my_rank, xor_files[0]);
  unsigned long full_chunk_filesize = scr_file_size(xor_files[0]);
  int missing_complete = 1;
  scr_meta* meta_chunk = scr_meta_new();
  scr_meta_set_filename(meta_chunk, xor_files[0]);
  scr_meta_set_filetype(meta_chunk, SCR_META_FILE_XOR);
  scr_meta_set_filesize(meta_chunk, full_chunk_filesize);
  /* TODO: remove this from meta file, for now it's needed in scr_index.c */
  scr_meta_set_ranks(meta_chunk, num_ranks);
  scr_meta_set_complete(meta_chunk, missing_complete);
  scr_filemap_set_meta(map, dset_id, my_rank, xor_files[0], meta_chunk);

  /* set expected number of files for the missing rank */
  int expected_num_files = scr_filemap_num_files(map, dset_id, my_rank);
  scr_filemap_set_expected_files(map, dset_id, my_rank, expected_num_files);

  /* compute, check, and store crc values with files */
  for (j=0; j < num_files[0]; j++) {
    /* compute crc on user file */
    char* user_file_relative = user_rel_files[j];
    if (scr_compute_crc(map, dset_id, my_rank, user_file_relative) != SCR_SUCCESS) {
      /* the crc check failed, so delete the file */
      scr_file_unlink(user_files[j]);
      rc = 1;
    }
  }
  if (scr_compute_crc(map, dset_id, my_rank, xor_files[0]) != SCR_SUCCESS) {
    /* the crc check failed, so delete the file */
    scr_file_unlink(xor_files[0]);
    rc = 1;
  }

  /* store flush descriptor */
  scr_filemap_set_flushdesc(map, dset_id, my_rank, flushdesc);

  /* write filemap for this rank */
  scr_path* path_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_map, "fmap.%d.scr", my_rank);
  if (scr_filemap_write(path_map, map) != SCR_SUCCESS) {
    rc = 1;
  }
  scr_path_delete(&path_map);

  /* delete the map */
  scr_filemap_delete(&map);

  scr_meta_delete(&meta_chunk);

  /* delete the flush/scavenge descriptor */
  scr_hash_delete(&flushdesc);

  scr_free(&offset);

  for (i=0; i < total_num_files; i++) {
    scr_free(&user_rel_files[i]);
    scr_free(&user_files[i]);
  }

  scr_free(&user_filesizes);
  scr_free(&user_rel_files);
  scr_free(&user_files);
  scr_free(&user_fds);

  for (i=0; i < xor_set_size; i++) {
    scr_hash_delete(&xor_headers[i]);
  }

  for (i=0; i < xor_set_size; i++) {
    scr_free(&xor_files[i]);
  }

  scr_free(&xor_headers);
  scr_free(&xor_fds);
  scr_free(&xor_files);
  scr_free(&offsets);
  scr_free(&num_files);

  scr_free(&buffer_B);
  scr_free(&buffer_A);

  scr_path_delete(&path_dset);

  return rc;
}
Beispiel #11
0
int main(int argc, char* argv[])
{
  /* print usage if not enough arguments were given */
  if (argc < 2) {
    printf("Usage: scr_inspect_cache <cntldir>\n");
    return 1;
  }

  scr_path* scr_master_map_file = scr_path_from_str(strdup(argv[1]));

  /* get my hostname */
  if (gethostname(scr_my_hostname, sizeof(scr_my_hostname)) != 0) {
    scr_err("scr_inspect_cache: Call to gethostname failed @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the master map */
  scr_hash* hash = scr_hash_new();
  scr_hash_read_path(scr_master_map_file, hash);

  /* create an empty filemap */
  scr_filemap* map = scr_filemap_new();

  /* for each filemap listed in the master map */
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap"));
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename of this filemap */
    char* file = scr_hash_elem_key(elem);

    /* read in the filemap */
    scr_filemap* tmp_map = scr_filemap_new();
    scr_path* path_file = scr_path_from_str(file);
    scr_filemap_read(path_file, tmp_map);
    scr_path_delete(&path_file);

    /* merge it with local 0 filemap */
    scr_filemap_merge(map, tmp_map);

    /* delete filemap */
    scr_filemap_delete(&tmp_map);
  }

  /* scan each file for each rank of each dataset */
  scr_hash_elem* dset_elem;
  for (dset_elem = scr_filemap_first_dataset(map);
       dset_elem != NULL;
       dset_elem = scr_hash_elem_next(dset_elem))
  {
    /* get dataset id */
    int dset = scr_hash_elem_key_int(dset_elem);

    scr_hash_elem* rank_elem;
    for (rank_elem = scr_filemap_first_rank_by_dataset(map, dset);
         rank_elem != NULL;
         rank_elem = scr_hash_elem_next(rank_elem))
    {
      /* get rank id */
      int rank = scr_hash_elem_key_int(rank_elem);

      int missing_file = 0;
      int expected = scr_filemap_get_expected_files(map, dset, rank);
      int num      = scr_filemap_num_files(map, dset, rank);
      if (expected == num) {
        /* first time through the file list, check that we have each file */
        scr_hash_elem* file_elem = NULL;
        for (file_elem = scr_filemap_first_file(map, dset, rank);
             file_elem != NULL;
             file_elem = scr_hash_elem_next(file_elem))
        {
          /* get filename */
          char* file = scr_hash_elem_key(file_elem);

          /* check that we can read the file */
          if (! scr_bool_have_file(map, dset, rank, file)) {
              missing_file = 1;
              scr_dbg(1, "File is unreadable or incomplete: Dataset %d, Rank %d, File: %s",
                dset, rank, file
              );
          }
        }
      } else {
        missing_file = 1;
      }

      /* TODO: print partner names */
      /* if we're not missing a file for rank, print this info out */
      if (! missing_file) {
        scr_hash* desc = scr_hash_new();
        scr_filemap_get_desc(map, dset, rank, desc);
        char* type           = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_TYPE);
        char* groups_str     = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUPS);
        char* group_id_str   = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_ID);
        char* group_size_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_SIZE);
        char* group_rank_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_RANK);
        if (type != NULL && groups_str != NULL && group_id_str != NULL && group_size_str != NULL && group_rank_str != NULL) {
          /* we already have a group id and rank, use that to rebuild the communicator */
          int groups     = atoi(groups_str);
          int group_id   = atoi(group_id_str);
          int group_size = atoi(group_size_str);
          int group_rank = atoi(group_rank_str);
          printf("DSET=%d RANK=%d TYPE=%s GROUPS=%d GROUP_ID=%d GROUP_SIZE=%d GROUP_RANK=%d FILES=1\n",
            dset, rank, type, groups, group_id, group_size, group_rank
          );
        }
      }
    }
  }

  scr_path_delete(&scr_master_map_file);

  return 0;
}
Beispiel #12
0
/* verify the hash is a valid hash for a version 5 summary file */
static int scr_summary_check_v5(scr_hash* hash)
{
  /* check that the summary file version is something we support */
  int version;
  if (scr_hash_util_get_int(hash, SCR_SUMMARY_KEY_VERSION, &version) != SCR_SUCCESS) {
    /* couldn't find version number */
    scr_err("Failed to read version number in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  if (version != SCR_SUMMARY_FILE_VERSION_5) {
    /* invalid version number */
    scr_err("Found version number %d when %d was expected in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that we have exactly one checkpoint */
  scr_hash* ckpt_hash = scr_hash_get(hash, SCR_SUMMARY_5_KEY_CKPT);
  if (scr_hash_size(ckpt_hash) != 1) {
    scr_err("More than one checkpoint found in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* get the first (and only) checkpoint id */
  char* ckpt_str = scr_hash_elem_get_first_val(hash, SCR_SUMMARY_5_KEY_CKPT);
  scr_hash* ckpt = scr_hash_get(ckpt_hash, ckpt_str);

  /* check that the complete string is set and is set to 1 */
  int complete;
  if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_COMPLETE, &complete) != SCR_SUCCESS) {
    /* could not find complete value (assume it's incomplete) */
    return SCR_FAILURE;
  }
  if (complete != 1) {
    /* checkpoint is marked as incomplete */
    return SCR_FAILURE;
  }

  /* read in the the number of ranks for this checkpoint */
  int ranks;
  if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_RANKS, &ranks) != SCR_SUCCESS) {
    scr_err("Failed to read number of ranks in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that the number of ranks matches the number we're currently running with */
  if (ranks != scr_ranks_world) {
    scr_err("Number of ranks %d that wrote checkpoint does not match current number of ranks %d @ %s:%d",
      ranks, scr_ranks_world, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  return SCR_SUCCESS;
}
Beispiel #13
0
/* fetch files from parallel file system */
static int scr_fetch_files(
  scr_filemap* map,
  scr_path* fetch_path,
  int* dataset_id,
  int* checkpoint_id)
{
  /* get fetch directory as string */
  char* fetch_dir = scr_path_strdup(fetch_path);

  /* this may take a while, so tell user what we're doing */
  if (scr_my_rank_world == 0) {
    scr_dbg(1, "Attempting fetch from %s", fetch_dir);
  }

  /* make sure all processes make it this far before progressing */
  MPI_Barrier(scr_comm_world);

  /* start timer */
  time_t timestamp_start;
  double time_start;
  if (scr_my_rank_world == 0) {
    timestamp_start = scr_log_seconds();
    time_start = MPI_Wtime();
  }

  /* log the fetch attempt */
  if (scr_my_rank_world == 0) {
    if (scr_log_enable) {
      time_t now = scr_log_seconds();
      scr_log_event("FETCH STARTED", fetch_dir, NULL, &now, NULL);
    }
  }

  /* allocate a new hash to get a list of files to fetch */
  scr_hash* file_list = scr_hash_new();

  /* read the summary file */
  if (scr_fetch_summary(fetch_dir, file_list) != SCR_SUCCESS) {
    if (scr_my_rank_world == 0) {
      scr_dbg(1, "Failed to read summary file @ %s:%d", __FILE__, __LINE__);
      if (scr_log_enable) {
        double time_end = MPI_Wtime();
        double time_diff = time_end - time_start;
        time_t now = scr_log_seconds();
        scr_log_event("FETCH FAILED", fetch_dir, NULL, &now, &time_diff);
      }
    }
    scr_hash_delete(&file_list);
    scr_free(&fetch_dir);
    return SCR_FAILURE;
  }

  /* get a pointer to the dataset */
  scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET);

  /* get the dataset id */
  int id;
  if (scr_dataset_get_id(dataset, &id) != SCR_SUCCESS) {
    if (scr_my_rank_world == 0) {
      scr_dbg(1, "Invalid id in summary file @ %s:%d", __FILE__, __LINE__);
      if (scr_log_enable) {
        double time_end = MPI_Wtime();
        double time_diff = time_end - time_start;
        time_t now = scr_log_seconds();
        scr_log_event("FETCH FAILED", fetch_dir, NULL, &now, &time_diff);
      }
    }
    scr_hash_delete(&file_list);
    scr_free(&fetch_dir);
    return SCR_FAILURE;
  }

  /* get the checkpoint id for this dataset */
  int ckpt_id;
  if (scr_dataset_get_ckpt(dataset, &ckpt_id) != SCR_SUCCESS) {
    /* eventually, we'll support reading of non-checkpoint datasets,
     * but we don't yet */
    scr_err("Failed to read checkpoint id from dataset @ %s:%d",
      __FILE__, __LINE__
    );
    scr_hash_delete(&file_list);
    scr_free(&fetch_dir);
    return SCR_FAILURE;
  }

  /* delete any existing files for this dataset id (do this before
   * filemap_read) */
  scr_cache_delete(map, id);

  /* get the redundancy descriptor for this id */
  scr_reddesc* c = scr_reddesc_for_checkpoint(ckpt_id, scr_nreddescs, scr_reddescs);

  /* store our redundancy descriptor hash in the filemap */
  scr_hash* my_desc_hash = scr_hash_new();
  scr_reddesc_store_to_hash(c, my_desc_hash);
  scr_filemap_set_desc(map, id, scr_my_rank_world, my_desc_hash);
  scr_hash_delete(&my_desc_hash);

  /* write the filemap out before creating the directory */
  scr_filemap_write(scr_map_file, map);

  /* create the cache directory */
  scr_cache_dir_create(c, id);

  /* get the cache directory */
  char cache_dir[SCR_MAX_FILENAME];
  scr_cache_dir_get(c, id, cache_dir);

  /* now we can finally fetch the actual files */
  int success = 1;
  if (scr_fetch_data(file_list, cache_dir, map) != SCR_SUCCESS) {
    success = 0;
  }

  /* free the hash holding the summary file data */
  scr_hash_delete(&file_list);

  /* check that all processes copied their file successfully */
  if (! scr_alltrue(success)) {
    /* someone failed, so let's delete the partial checkpoint */
    scr_cache_delete(map, id);

    if (scr_my_rank_world == 0) {
      scr_dbg(1, "One or more processes failed to read its files @ %s:%d",
        __FILE__, __LINE__
      );
      if (scr_log_enable) {
        double time_end = MPI_Wtime();
        double time_diff = time_end - time_start;
        time_t now = scr_log_seconds();
        scr_log_event("FETCH FAILED", fetch_dir, &id, &now, &time_diff);
      }
    }
    scr_free(&fetch_dir);
    return SCR_FAILURE;
  }

  /* apply redundancy scheme */
  double bytes_copied = 0.0;
  int rc = scr_reddesc_apply(map, c, id, &bytes_copied);
  if (rc == SCR_SUCCESS) {
    /* record dataset and checkpoint ids */
    *dataset_id = id;
    *checkpoint_id = ckpt_id;

    /* update our flush file to indicate this checkpoint is in cache
     * as well as the parallel file system */
    /* TODO: should we place SCR_FLUSH_KEY_LOCATION_PFS before
     * scr_reddesc_apply? */
    scr_flush_file_location_set(id, SCR_FLUSH_KEY_LOCATION_CACHE);
    scr_flush_file_location_set(id, SCR_FLUSH_KEY_LOCATION_PFS);
    scr_flush_file_location_unset(id, SCR_FLUSH_KEY_LOCATION_FLUSHING);
  } else {
    /* something went wrong, so delete this checkpoint from the cache */
    scr_cache_delete(scr_map, id);
  }

  /* stop timer, compute bandwidth, and report performance */
  double total_bytes = bytes_copied;
  if (scr_my_rank_world == 0) {
    double time_end = MPI_Wtime();
    double time_diff = time_end - time_start;
    double bw = total_bytes / (1024.0 * 1024.0 * time_diff);
    scr_dbg(1, "scr_fetch_files: %f secs, %e bytes, %f MB/s, %f MB/s per proc",
      time_diff, total_bytes, bw, bw/scr_ranks_world
    );

    /* log data on the fetch to the database */
    if (scr_log_enable) {
      time_t now = scr_log_seconds();
      if (rc == SCR_SUCCESS) {
        scr_log_event("FETCH SUCCEEDED", fetch_dir, &id, &now, &time_diff);
      } else {
        scr_log_event("FETCH FAILED", fetch_dir, &id, &now, &time_diff);
      }

      char cache_dir[SCR_MAX_FILENAME];
      scr_cache_dir_get(c, id, cache_dir);
      scr_log_transfer("FETCH", fetch_dir, cache_dir, &id,
        &timestamp_start, &time_diff, &total_bytes
      );
    }
  }

  /* free fetch direcotry string */
  scr_free(&fetch_dir);

  return rc;
}
Beispiel #14
0
/* read contents of summary file */
static int scr_fetch_summary(
  const char* summary_dir,
  scr_hash* file_list)
{
  /* assume that we won't succeed in our fetch attempt */
  int rc = SCR_SUCCESS;

  /* check whether summary file exists and is readable */
  if (scr_my_rank_world == 0) {
    /* check that we can access the directory */
    if (scr_file_is_readable(summary_dir) != SCR_SUCCESS) {
      scr_err("Failed to access summary directory %s @ %s:%d",
        summary_dir, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }
  }

  /* broadcast success code from rank 0 */
  MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world);
  if (rc != SCR_SUCCESS) {
    return rc;
  }

  /* add path to file list */
  scr_hash_util_set_str(file_list, SCR_KEY_PATH, summary_dir);

  /* build path to summary file */
  scr_path* dataset_path = scr_path_from_str(summary_dir);
  scr_path* meta_path = scr_path_dup(dataset_path);
  scr_path_append_str(meta_path, ".scr");
  scr_path_reduce(meta_path);

  /* rank 0 reads the summary file */
  scr_hash* header = scr_hash_new();
  if (scr_my_rank_world == 0) {
    /* build path to summary file */
    scr_path* summary_path = scr_path_dup(meta_path);
    scr_path_append_str(summary_path, "summary.scr");
    const char* summary_file = scr_path_strdup(summary_path);

    /* open file for reading */
    int fd = scr_open(summary_file, O_RDONLY);
    if (fd >= 0) {
      /* read summary hash */
      ssize_t header_size = scr_hash_read_fd(summary_file, fd, header);
      if (header_size < 0) {
        rc = SCR_FAILURE;
      }

      /* TODO: check that the version is correct */

      /* close the file */
      scr_close(summary_file, fd);
    } else {
      scr_err("Failed to open summary file %s @ %s:%d",
        summary_file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }

    /* free summary path and string */
    scr_free(&summary_file);
    scr_path_delete(&summary_path);
  }

  /* broadcast success code from rank 0 */
  MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world);
  if (rc != SCR_SUCCESS) {
    goto cleanup;
  }

  /* broadcast the summary hash */
  scr_hash_bcast(header, 0, scr_comm_world);

  /* extract and record the datast in file list */
  scr_hash* dataset_hash = scr_hash_new();
  scr_dataset* dataset = scr_hash_get(header, SCR_SUMMARY_6_KEY_DATASET);
  scr_hash_merge(dataset_hash, dataset);
  scr_hash_set(file_list, SCR_SUMMARY_6_KEY_DATASET, dataset_hash);

  /* build path to rank2file map */
  scr_path* rank2file_path = scr_path_dup(meta_path);
  scr_path_append_str(rank2file_path, "rank2file.scr");

  /* fetch file names and offsets containing file hash data */
  int valid = 0;
  char* file = NULL;
  unsigned long offset = 0;
  if (scr_my_rank_world == 0) {
    /* rank 0 is only valid reader to start with */
    valid  = 1;
    file   = scr_path_strdup(rank2file_path);
    offset = 0;
  }
  if (scr_fetch_rank2file_map(dataset_path, 1, &valid, &file, &offset)
      != SCR_SUCCESS)
  {
    rc = SCR_FAILURE;
  }

  /* create hashes to exchange data */
  scr_hash* send = scr_hash_new();
  scr_hash* recv = scr_hash_new();

  /* read data from file */
  if (valid) {
    /* open file if necessary */
    int fd = scr_open(file, O_RDONLY);
    if (fd >= 0) {
      /* create hash to hold file contents */
      scr_hash* save = scr_hash_new();

      /* read hash from file */
      scr_lseek(file, fd, offset, SEEK_SET);
      ssize_t readsize = scr_hash_read_fd(file, fd, save);
      if (readsize < 0) {
        scr_err("Failed to read rank2file map file %s @ %s:%d",
          file, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* check that the number of ranks match */
      int ranks = 0;
      scr_hash_util_get_int(save, SCR_SUMMARY_6_KEY_RANKS, &ranks);
      if (ranks != scr_ranks_world) {
        scr_err("Invalid number of ranks in %s, got %d expected %d @ %s:%d",
          file, ranks, scr_ranks_world, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* delete current send hash, set it to values from file,
       * delete file hash */
      scr_hash_delete(&send);
      send = scr_hash_extract(save, SCR_SUMMARY_6_KEY_RANK);
      scr_hash_delete(&save);

      /* close the file */
      scr_close(file, fd);
    } else {
      scr_err("Failed to open rank2file map %s @ %s:%d",
        file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }

    /* delete file name string */
    scr_free(&file);
  }

  /* check that everyone read the data ok */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup_hashes;
  }

  /* scatter to groups */
  scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT);

  /* iterate over the ranks that sent data to us, and set up our
   * list of files */
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(recv);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* the key is the source rank, which we don't care about,
     * the info we need is in the element hash */
    scr_hash* elem_hash = scr_hash_elem_hash(elem);

    /* get pointer to file hash */
    scr_hash* file_hash = scr_hash_get(elem_hash, SCR_SUMMARY_6_KEY_FILE);
    if (file_hash != NULL) {
      /* TODO: parse summary file format */
      scr_hash_merge(file_list, elem_hash);
    } else {
      rc = SCR_FAILURE;
    }
  }

  /* fill in file list parameters */
  if (rc == SCR_SUCCESS) {
    /* if we're not using containers, add PATH entry for each of our
     * files */
    scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
    for (elem = scr_hash_elem_first(files);
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the file name */
      char* file = scr_hash_elem_key(elem);

      /* combine the file name with the summary directory to build a
       * full path to the file */
      scr_path* path_full = scr_path_dup(dataset_path);
      scr_path_append_str(path_full, file);

      /* subtract off last component to get just the path */
      scr_path_dirname(path_full);
      char* path = scr_path_strdup(path_full);

      /* record path in file list */
      scr_hash* hash = scr_hash_elem_hash(elem);
      scr_hash_util_set_str(hash, SCR_KEY_PATH, path);

      /* free the path and string */
      scr_free(&path);
      scr_path_delete(&path_full);
    }
  }

  /* check that everyone read the data ok */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup_hashes;
  }

cleanup_hashes:
  /* delete send and receive hashes */
  scr_hash_delete(&recv);
  scr_hash_delete(&send);

  /* free string and path for rank2file map */
  scr_path_delete(&rank2file_path);

cleanup:
  /* free the header hash */
  scr_hash_delete(&header);

  /* free path for dataset directory */
  scr_path_delete(&meta_path);
  scr_path_delete(&dataset_path);

  return rc;
}
Beispiel #15
0
static int scr_fetch_rank2file_map(
  const scr_path* dataset_path,
  int             depth,
  int*            ptr_valid,
  char**          ptr_file,
  unsigned long*  ptr_offset)
{
  int rc = SCR_SUCCESS;

  /* get local variables so we don't have to deference everything */
  int valid            = *ptr_valid;
  char* file           = *ptr_file;
  unsigned long offset = *ptr_offset;

  /* create a hash to hold section of file */
  scr_hash* hash = scr_hash_new();

  /* if we can read from file do it */
  if (valid) {
    /* open file if we haven't already */
    int fd = scr_open(file, O_RDONLY);
    if (fd >= 0) {
      /* read our segment from the file */
      scr_lseek(file, fd, offset, SEEK_SET);
      ssize_t read_rc = scr_hash_read_fd(file, fd, hash);
      if (read_rc < 0) {
        scr_err("Failed to read from %s @ %s:%d",
          file, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* close the file */
      scr_close(file, fd);
    } else {
      scr_err("Failed to open rank2file map %s @ %s:%d",
        file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }
  }

  /* check for read errors */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup;
  }

  /* create hashes to exchange data */
  scr_hash* send = scr_hash_new();
  scr_hash* recv = scr_hash_new();

  /* copy rank data into send hash */
  if (valid) {
    scr_hash* rank_hash = scr_hash_get(hash, SCR_SUMMARY_6_KEY_RANK);
    scr_hash_merge(send, rank_hash);
  }

  /* exchange hashes */
  scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT);

  /* see if anyone sent us anything */
  int newvalid = 0;
  char* newfile = NULL;
  unsigned long newoffset = 0;
  scr_hash_elem* elem = scr_hash_elem_first(recv);
  if (elem != NULL) {
    /* got something, so now we'll read in the next step */
    newvalid = 1;

    /* get file name we should read */
    scr_hash* elem_hash = scr_hash_elem_hash(elem);
    char* value;
    if (scr_hash_util_get_str(elem_hash, SCR_SUMMARY_6_KEY_FILE, &value)
        == SCR_SUCCESS)
    {
      /* return string of full path to file to caller */
      scr_path* newpath = scr_path_dup(dataset_path);
      scr_path_append_str(newpath, value);
      newfile = scr_path_strdup(newpath);
      scr_path_delete(&newpath);
    } else {
      rc = SCR_FAILURE;
    }

    /* get offset we should start reading from */
    if (scr_hash_util_get_bytecount(elem_hash, SCR_SUMMARY_6_KEY_OFFSET, &newoffset)
        != SCR_SUCCESS)
    {
      rc = SCR_FAILURE;
    }
  }

  /* free the send and receive hashes */
  scr_hash_delete(&recv);
  scr_hash_delete(&send);

  /* get level id, and broadcast it from rank 0,
   * which we assume to be a reader in all steps */
  int level_id = -1;
  if (valid) {
    if (scr_hash_util_get_int(hash, SCR_SUMMARY_6_KEY_LEVEL, &level_id)
        != SCR_SUCCESS)
    {
      rc = SCR_FAILURE;
    }
  }
  MPI_Bcast(&level_id, 1, MPI_INT, 0, scr_comm_world);

  /* check for read errors */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup;
  }

  /* set parameters for output or next iteration,
   * we already took care of updating ptr_fd earlier */
  if (valid) {
    scr_free(ptr_file);
  }
  *ptr_valid  = newvalid;
  *ptr_file   = newfile;
  *ptr_offset = newoffset;

  /* recurse if we still have levels to read */
  if (level_id > 1) {
    rc = scr_fetch_rank2file_map(dataset_path, depth+1, ptr_valid, ptr_file, ptr_offset);
  }

cleanup:
  /* free the hash */
  scr_hash_delete(&hash);

  return rc;
}
Beispiel #16
0
/* fetch files listed in hash into specified cache directory,
 * update filemap and fill in total number of bytes fetched,
 * returns SCR_SUCCESS if successful */
static int scr_fetch_files_list(
  const scr_hash* file_list,
  const char* dir,
  scr_filemap* map)
{
  /* assume we'll succeed in fetching our files */
  int rc = SCR_SUCCESS;

  /* assume we don't have any files to fetch */
  int my_num_files = 0;

  /* get dataset id */
  int id;
  scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET);
  scr_dataset_get_id(dataset, &id);

  /* now iterate through the file list and fetch each file */
  scr_hash_elem* file_elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (file_elem = scr_hash_elem_first(files);
       file_elem != NULL;
       file_elem = scr_hash_elem_next(file_elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(file_elem);

    /* get a pointer to the hash for this file */
    scr_hash* hash = scr_hash_elem_hash(file_elem);

    /* check whether we are supposed to fetch this file */
    /* TODO: this is a hacky way to avoid reading a redundancy file
     * back in under the assumption that it's an original file, which
     * breaks our redundancy computation due to a name conflict on
     * the file names */
    scr_hash_elem* no_fetch_hash = scr_hash_elem_get(hash, SCR_SUMMARY_6_KEY_NOFETCH);
    if (no_fetch_hash != NULL) {
      continue;
    }

    /* increment our file count */
    my_num_files++;

    /* build the destination file name */
    scr_path* path_newfile = scr_path_from_str(file);
    scr_path_basename(path_newfile);
    scr_path_prepend_str(path_newfile, dir);
    char* newfile = scr_path_strdup(path_newfile);
      
    /* add the file to our filemap and write it to disk before creating
     * the file, this way we have a record that it may exist before we
     * actually start to fetch it */
    scr_filemap_add_file(map, id, scr_my_rank_world, newfile);
    scr_filemap_write(scr_map_file, map);

    /* get the file size */
    unsigned long filesize = 0;
    if (scr_hash_util_get_unsigned_long(hash, SCR_KEY_SIZE, &filesize) != SCR_SUCCESS) {
      scr_err("Failed to read file size from summary data @ %s:%d",
        __FILE__, __LINE__
      );
      rc = SCR_FAILURE;

      /* free path and string */
      scr_free(&newfile);
      scr_path_delete(&path_newfile);

      break;
    }

    /* check for a complete flag */
    int complete = 1;
    if (scr_hash_util_get_int(hash, SCR_KEY_COMPLETE, &complete) != SCR_SUCCESS) {
      /* in summary file, the absence of a complete flag on a file
       * implies the file is complete */
      complete = 1;
    }

    /* create a new meta data object for this file */
    scr_meta* meta = scr_meta_new();

    /* set the meta data */
    scr_meta_set_filename(meta, newfile);
    scr_meta_set_filetype(meta, SCR_META_FILE_USER);
    scr_meta_set_filesize(meta, filesize);
    scr_meta_set_complete(meta, 1);
    /* TODODSET: move the ranks field elsewhere, for now it's needed
     * by scr_index.c */
    scr_meta_set_ranks(meta, scr_ranks_world);

    /* get the crc, if set, and add it to the meta data */
    uLong crc;
    if (scr_hash_util_get_crc32(hash, SCR_KEY_CRC, &crc) == SCR_SUCCESS) {
      scr_meta_set_crc32(meta, crc);
    }

    /* fetch file from containers if they are defined, otherwise fetch
     * the native file */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* get source path */
      char* from_dir;
      if (scr_hash_util_get_str(file_list, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        /* fetch file from containers */
        if (scr_fetch_file_from_containers(newfile, meta, segments, from_dir) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to find base dataset directory in file list */
        rc = SCR_FAILURE;
      }
    } else {
      /* fetch native file, lookup directory for this file */
      char* from_dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        if (scr_fetch_file(newfile, from_dir, meta) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to read source directory, mark file as incomplete */
        scr_meta_set_complete(meta, 0);
        rc = SCR_FAILURE;
      }
    }

    /* TODODSET: want to write out filemap before we start to fetch
     * each file? */

    /* mark the file as complete */
    scr_filemap_set_meta(map, id, scr_my_rank_world, newfile, meta);

    /* free the meta data object */
    scr_meta_delete(&meta);

    /* free path and string */
    scr_free(&newfile);
    scr_path_delete(&path_newfile);
  }

  /* set the expected number of files for this dataset */
  scr_filemap_set_expected_files(map, id, scr_my_rank_world, my_num_files);
  scr_filemap_write(scr_map_file, map);

  return rc;
}