Example #1
0
/* flush files specified in list, and record corresponding entries for summary file */
static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary)
{
  /* assume we will succeed in this flush */
  int rc = SCR_SUCCESS;

  /* flush each of my files and fill in summary data structure */
  scr_hash_elem* elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (elem = scr_hash_elem_first(files);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(elem);

    /* convert file to path and extract name of file */
    scr_path* path_name = scr_path_from_str(file);
    scr_path_basename(path_name);

    /* get the hash for this element */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get meta data for this file */
    scr_meta* meta = scr_hash_get(hash, SCR_KEY_META);

    /* if segments are defined, we flush the file to its containers,
     * otherwise we copy the file out as is */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* TODO: PRESERVE get original filename here */

      /* add this file to the summary file */
      char* name = scr_path_strdup(path_name);
      scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
      scr_free(&name);

// USERDEF fixme!
      /* flush the file to the containers listed in its segmenets */
      if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) {
        /* successfully flushed this file, record the filesize */
        unsigned long filesize = 0;
        if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
          scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
        }

        /* record the crc32 if one was computed */
        uLong crc = 0;
        if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
          scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
        }

        /* record segment information in summary file */
        scr_hash* segments_copy = scr_hash_new();
        scr_hash_merge(segments_copy, segments);
        scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy);
      } else {
        /* the flush failed */
        rc = SCR_FAILURE;

        /* explicitly mark file as incomplete */
        scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
      }
    } else {
      /* get directory to flush file to */
      char* dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) {
        /* create full path of destination file */
        scr_path* path_full = scr_path_from_str(dir);
        scr_path_append(path_full, path_name);

        /* get relative path to flushed file from SCR_PREFIX directory */
        scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full);
        if (! scr_path_is_null(path_relative)) {
          /* record the name of the file in the summary hash, and get reference to a hash for this file */
          char* name = scr_path_strdup(path_relative);
          scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
          scr_free(&name);

          /* flush the file and fill in the meta data for this file */
          if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) {
            /* successfully flushed this file, record the filesize */
            unsigned long filesize = 0;
            if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
              scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
            }

            /* record the crc32 if one was computed */
            uLong crc = 0;
            if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
              scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
            }
          } else {
            /* the flush failed */
            rc = SCR_FAILURE;

            /* explicitly mark incomplete files */
            scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
          }
        } else {
          scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d",
            dir, scr_prefix, __FILE__, __LINE__
          );
        }

        /* free relative and full paths */
        scr_path_delete(&path_relative);
        scr_path_delete(&path_full);
      } else {
        scr_abort(-1, "Failed to read directory to flush file to @ %s:%d",
          __FILE__, __LINE__
        );
      }
    }

    /* free the file name path */
    scr_path_delete(&path_name);
  }

  return rc;
}
Example #2
0
/* attempt to fetch most recent checkpoint from prefix directory into
 * cache, fills in map if successful and sets fetch_attempted to 1 if
 * any fetch is attempted, returns SCR_SUCCESS if successful */
int scr_fetch_sync(scr_filemap* map, int* fetch_attempted)
{
  /* we only return success if we successfully fetch a checkpoint */
  int rc = SCR_FAILURE;

  double time_start, time_end, time_diff;

  /* start timer */
  if (scr_my_rank_world == 0) {
    time_start = MPI_Wtime();
  }

  /* have rank 0 read the index file */
  scr_hash* index_hash = NULL;
  int read_index_file = 0;
  if (scr_my_rank_world == 0) {
    /* create an empty hash to store our index */
    index_hash = scr_hash_new();

    /* read the index file */
    if (scr_index_read(scr_prefix_path, index_hash) == SCR_SUCCESS) {
      read_index_file = 1;
    }
  }

  /* don't enter while loop below if rank 0 failed to read index file */
  int continue_fetching = 1;
  MPI_Bcast(&read_index_file, 1, MPI_INT, 0, scr_comm_world);
  if (! read_index_file) {
    continue_fetching = 0;
  }

  /* now start fetching, we keep trying until we exhaust all valid
   * checkpoints */
  char target[SCR_MAX_FILENAME];
  int current_checkpoint_id = -1;
  while (continue_fetching) {
    /* create a new path */
    scr_path* fetch_path = scr_path_new();

    /* initialize our target directory to empty string */
    strcpy(target, "");

    /* rank 0 determines the directory to fetch from */
    if (scr_my_rank_world == 0) {
      /* read the current directory if it's set */
      char* current_str;
      if (scr_index_get_current(index_hash, &current_str) == SCR_SUCCESS) {
        size_t current_str_len = strlen(current_str) + 1;
        if (current_str_len <= sizeof(target)) {
          strcpy(target, current_str);
        } else {
          /* ERROR */
        }
      }

      /* lookup the checkpoint id */
      int next_checkpoint_id = -1;
      if (strcmp(target, "") != 0) {
        /* we have a subdirectory name, lookup the checkpoint id
         * corresponding to this directory */
        scr_index_get_id_by_dir(index_hash, target, &next_checkpoint_id);
      } else {
        /* otherwise, just get the most recent complete checkpoint
         * (that's older than the current id) */
        scr_index_get_most_recent_complete(index_hash, current_checkpoint_id, &next_checkpoint_id, target);
      }
      current_checkpoint_id = next_checkpoint_id;

      /* TODODSET: need to verify that dataset is really a checkpoint
       * and keep searching if not */

      /* if we have a subdirectory (target) name, build the full fetch
       * directory */
      if (strcmp(target, "") != 0) {
        /* record that we're attempting a fetch of this checkpoint in
         * the index file */
        *fetch_attempted = 1;
        if (current_checkpoint_id != -1) {
          scr_index_mark_fetched(index_hash, current_checkpoint_id, target);
          scr_index_write(scr_prefix_path, index_hash);
        }

        /* we have a subdirectory, now build the full path */
        scr_path_append(fetch_path, scr_prefix_path);
        scr_path_append_str(fetch_path, target);
        scr_path_reduce(fetch_path);
      }
    }

    /* broadcast fetch path from rank 0 */
    scr_path_bcast(fetch_path, 0, scr_comm_world);

    /* check whether we've got a path */
    if (! scr_path_is_null(fetch_path)) {
      /* got something, attempt to fetch the checkpoint */
      int dset_id, ckpt_id;
      rc = scr_fetch_files(map, fetch_path, &dset_id, &ckpt_id);
      if (rc == SCR_SUCCESS) {
        /* set the dataset and checkpoint ids */
        scr_dataset_id = dset_id;
        scr_checkpoint_id = ckpt_id;

        /* we succeeded in fetching this checkpoint, set current to
         * point to it, and stop fetching */
        if (scr_my_rank_world == 0) {
          scr_index_set_current(index_hash, target);
          scr_index_write(scr_prefix_path, index_hash);
        }
        continue_fetching = 0;
      } else {
        /* we tried to fetch, but we failed, mark it as failed in
         * the index file so we don't try it again */
        if (scr_my_rank_world == 0) {
          /* unset the current pointer */
          scr_index_unset_current(index_hash);
          if (current_checkpoint_id != -1 && strcmp(target, "") != 0) {
            scr_index_mark_failed(index_hash, current_checkpoint_id, target);
          }
          scr_index_write(scr_prefix_path, index_hash);
        }
      }
    } else {
      /* we ran out of valid checkpoints in the index file,
       * bail out of the loop */
      continue_fetching = 0;
    }

    /* free fetch path */
    scr_path_delete(&fetch_path);
  }

  /* delete the index hash */
  if (scr_my_rank_world == 0) {
    scr_hash_delete(&index_hash);
  }

  /* broadcast whether we actually attempted to fetch anything
   * (only rank 0 knows) */
  MPI_Bcast(fetch_attempted, 1, MPI_INT, 0, scr_comm_world);

  /* stop timer for fetch */
  if (scr_my_rank_world == 0) {
    time_end = MPI_Wtime();
    time_diff = time_end - time_start;
    scr_dbg(1, "scr_fetch_files: return code %d, %f secs", rc, time_diff);
  }

  return rc;
}