/* flush files specified in list, and record corresponding entries for summary file */ static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary) { /* assume we will succeed in this flush */ int rc = SCR_SUCCESS; /* flush each of my files and fill in summary data structure */ scr_hash_elem* elem = NULL; scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE); for (elem = scr_hash_elem_first(files); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename */ char* file = scr_hash_elem_key(elem); /* convert file to path and extract name of file */ scr_path* path_name = scr_path_from_str(file); scr_path_basename(path_name); /* get the hash for this element */ scr_hash* hash = scr_hash_elem_hash(elem); /* get meta data for this file */ scr_meta* meta = scr_hash_get(hash, SCR_KEY_META); /* if segments are defined, we flush the file to its containers, * otherwise we copy the file out as is */ scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT); if (segments != NULL) { /* TODO: PRESERVE get original filename here */ /* add this file to the summary file */ char* name = scr_path_strdup(path_name); scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name); scr_free(&name); // USERDEF fixme! /* flush the file to the containers listed in its segmenets */ if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) { /* successfully flushed this file, record the filesize */ unsigned long filesize = 0; if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) { scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize); } /* record the crc32 if one was computed */ uLong crc = 0; if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) { scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc); } /* record segment information in summary file */ scr_hash* segments_copy = scr_hash_new(); scr_hash_merge(segments_copy, segments); scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy); } else { /* the flush failed */ rc = SCR_FAILURE; /* explicitly mark file as incomplete */ scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0); } } else { /* get directory to flush file to */ char* dir; if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) { /* create full path of destination file */ scr_path* path_full = scr_path_from_str(dir); scr_path_append(path_full, path_name); /* get relative path to flushed file from SCR_PREFIX directory */ scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full); if (! scr_path_is_null(path_relative)) { /* record the name of the file in the summary hash, and get reference to a hash for this file */ char* name = scr_path_strdup(path_relative); scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name); scr_free(&name); /* flush the file and fill in the meta data for this file */ if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) { /* successfully flushed this file, record the filesize */ unsigned long filesize = 0; if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) { scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize); } /* record the crc32 if one was computed */ uLong crc = 0; if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) { scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc); } } else { /* the flush failed */ rc = SCR_FAILURE; /* explicitly mark incomplete files */ scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0); } } else { scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d", dir, scr_prefix, __FILE__, __LINE__ ); } /* free relative and full paths */ scr_path_delete(&path_relative); scr_path_delete(&path_full); } else { scr_abort(-1, "Failed to read directory to flush file to @ %s:%d", __FILE__, __LINE__ ); } } /* free the file name path */ scr_path_delete(&path_name); } return rc; }
/* attempt to fetch most recent checkpoint from prefix directory into * cache, fills in map if successful and sets fetch_attempted to 1 if * any fetch is attempted, returns SCR_SUCCESS if successful */ int scr_fetch_sync(scr_filemap* map, int* fetch_attempted) { /* we only return success if we successfully fetch a checkpoint */ int rc = SCR_FAILURE; double time_start, time_end, time_diff; /* start timer */ if (scr_my_rank_world == 0) { time_start = MPI_Wtime(); } /* have rank 0 read the index file */ scr_hash* index_hash = NULL; int read_index_file = 0; if (scr_my_rank_world == 0) { /* create an empty hash to store our index */ index_hash = scr_hash_new(); /* read the index file */ if (scr_index_read(scr_prefix_path, index_hash) == SCR_SUCCESS) { read_index_file = 1; } } /* don't enter while loop below if rank 0 failed to read index file */ int continue_fetching = 1; MPI_Bcast(&read_index_file, 1, MPI_INT, 0, scr_comm_world); if (! read_index_file) { continue_fetching = 0; } /* now start fetching, we keep trying until we exhaust all valid * checkpoints */ char target[SCR_MAX_FILENAME]; int current_checkpoint_id = -1; while (continue_fetching) { /* create a new path */ scr_path* fetch_path = scr_path_new(); /* initialize our target directory to empty string */ strcpy(target, ""); /* rank 0 determines the directory to fetch from */ if (scr_my_rank_world == 0) { /* read the current directory if it's set */ char* current_str; if (scr_index_get_current(index_hash, ¤t_str) == SCR_SUCCESS) { size_t current_str_len = strlen(current_str) + 1; if (current_str_len <= sizeof(target)) { strcpy(target, current_str); } else { /* ERROR */ } } /* lookup the checkpoint id */ int next_checkpoint_id = -1; if (strcmp(target, "") != 0) { /* we have a subdirectory name, lookup the checkpoint id * corresponding to this directory */ scr_index_get_id_by_dir(index_hash, target, &next_checkpoint_id); } else { /* otherwise, just get the most recent complete checkpoint * (that's older than the current id) */ scr_index_get_most_recent_complete(index_hash, current_checkpoint_id, &next_checkpoint_id, target); } current_checkpoint_id = next_checkpoint_id; /* TODODSET: need to verify that dataset is really a checkpoint * and keep searching if not */ /* if we have a subdirectory (target) name, build the full fetch * directory */ if (strcmp(target, "") != 0) { /* record that we're attempting a fetch of this checkpoint in * the index file */ *fetch_attempted = 1; if (current_checkpoint_id != -1) { scr_index_mark_fetched(index_hash, current_checkpoint_id, target); scr_index_write(scr_prefix_path, index_hash); } /* we have a subdirectory, now build the full path */ scr_path_append(fetch_path, scr_prefix_path); scr_path_append_str(fetch_path, target); scr_path_reduce(fetch_path); } } /* broadcast fetch path from rank 0 */ scr_path_bcast(fetch_path, 0, scr_comm_world); /* check whether we've got a path */ if (! scr_path_is_null(fetch_path)) { /* got something, attempt to fetch the checkpoint */ int dset_id, ckpt_id; rc = scr_fetch_files(map, fetch_path, &dset_id, &ckpt_id); if (rc == SCR_SUCCESS) { /* set the dataset and checkpoint ids */ scr_dataset_id = dset_id; scr_checkpoint_id = ckpt_id; /* we succeeded in fetching this checkpoint, set current to * point to it, and stop fetching */ if (scr_my_rank_world == 0) { scr_index_set_current(index_hash, target); scr_index_write(scr_prefix_path, index_hash); } continue_fetching = 0; } else { /* we tried to fetch, but we failed, mark it as failed in * the index file so we don't try it again */ if (scr_my_rank_world == 0) { /* unset the current pointer */ scr_index_unset_current(index_hash); if (current_checkpoint_id != -1 && strcmp(target, "") != 0) { scr_index_mark_failed(index_hash, current_checkpoint_id, target); } scr_index_write(scr_prefix_path, index_hash); } } } else { /* we ran out of valid checkpoints in the index file, * bail out of the loop */ continue_fetching = 0; } /* free fetch path */ scr_path_delete(&fetch_path); } /* delete the index hash */ if (scr_my_rank_world == 0) { scr_hash_delete(&index_hash); } /* broadcast whether we actually attempted to fetch anything * (only rank 0 knows) */ MPI_Bcast(fetch_attempted, 1, MPI_INT, 0, scr_comm_world); /* stop timer for fetch */ if (scr_my_rank_world == 0) { time_end = MPI_Wtime(); time_diff = time_end - time_start; scr_dbg(1, "scr_fetch_files: return code %d, %f secs", rc, time_diff); } return rc; }