/* searchs for name and returns a newly allocated hash of its value if set, * returns NULL if not found */ scr_hash* scr_param_get_hash(char* name) { scr_hash* hash = NULL; scr_hash* value_hash = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* TODO: need to strdup here to be safe? */ hash = scr_hash_new(); scr_hash_set(hash, getenv(name), scr_hash_new()); return hash; } /* otherwise, if parameter is set in user configuration file, return that value */ value_hash = scr_hash_get(scr_user_hash, name); if (no_user == NULL && value_hash != NULL) { hash = scr_hash_new(); scr_hash_merge(hash, value_hash); return hash; } /* otherwise, if parameter is set in system configuration file, return that value */ value_hash = scr_hash_get(scr_system_hash, name); if (value_hash != NULL) { hash = scr_hash_new(); scr_hash_merge(hash, value_hash); return hash; } /* parameter not found, return NULL */ return NULL; }
/* given a hash of files and a file name, check whether the named * file needs data transfered, if so, strdup its destination name * and set its position and filesize */ int need_transfer(scr_hash* files, char* src, char** dst, off_t* position, off_t* filesize) { /* check that we got a hash of files and a file name */ if (files == NULL || src == NULL) { return SCR_FAILURE; } /* lookup the specified file in the hash */ scr_hash* file_hash = scr_hash_get(files, src); if (file_hash == NULL) { return SCR_FAILURE; } /* extract the values for file size, bytes written, and destination */ unsigned long size, written; char* dest; if (scr_hash_util_get_bytecount(file_hash, SCR_TRANSFER_KEY_SIZE, &size) == SCR_SUCCESS && scr_hash_util_get_bytecount(file_hash, SCR_TRANSFER_KEY_WRITTEN, &written) == SCR_SUCCESS && scr_hash_util_get_str(file_hash, SCR_TRANSFER_KEY_DESTINATION, &dest) == SCR_SUCCESS) { /* if the bytes written value is less than the file size, * we've got a valid file */ if (written < size) { /* got our file, fill in output parameters */ *dst = strdup(dest); /* TODO: check for error */ *position = (off_t) written; *filesize = (off_t) size; return SCR_SUCCESS; } } return SCR_FAILURE; }
/* searchs for name and returns a character pointer to its value if set, * returns NULL if not found */ char* scr_param_get(char* name) { char* value = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* TODO: need to strdup here to be safe? */ return getenv(name); } /* otherwise, if parameter is set in user configuration file, return that value */ value = scr_hash_elem_get_first_val(scr_user_hash, name); if (no_user == NULL && value != NULL) { return value; } /* otherwise, if parameter is set in system configuration file, return that value */ value = scr_hash_elem_get_first_val(scr_system_hash, name); if (value != NULL) { return value; } /* parameter not found, return NULL */ return NULL; }
/* read config files and store contents */ int scr_param_init() { /* allocate storage and read in config files if we haven't already */ if (scr_param_ref_count == 0) { /* allocate hash object to hold names we cannot read from the * environment */ scr_no_user_hash = scr_hash_new(); scr_hash_set(scr_no_user_hash, "SCR_CNTL_BASE", scr_hash_new()); /* allocate hash object to store values from user config file, * if specified */ char* user_file = user_config_path(); if (user_file != NULL) { scr_user_hash = scr_hash_new(); scr_config_read(user_file, scr_user_hash); } scr_free(&user_file); /* allocate hash object to store values from system config file */ scr_system_hash = scr_hash_new(); scr_config_read(scr_config_file, scr_system_hash); /* initialize our hash to cache lookups to getenv */ scr_env_hash = scr_hash_new(); /* warn user if he set any parameters in his environment or user * config file which aren't permitted */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(scr_no_user_hash); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the parameter name */ char* key = scr_hash_elem_key(elem); char* env_val = getenv(key); scr_hash* env_hash = scr_hash_get(scr_user_hash, key); /* check whether this is set in the environment */ if (env_val != NULL || env_hash != NULL) { scr_err("%s cannot be set in the environment or user configuration file, ignoring setting", key ); } } } /* increment our reference count */ scr_param_ref_count++; return SCR_SUCCESS; }
/* searches for name and returns a character pointer to its value if set, * returns NULL if not found */ char* scr_param_get(char* name) { char* value = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* we don't just return the getenv value directly because that causes * segfaults on some systems, so instead we add it to a hash and return * the pointer into the hash */ /* try to lookup the value for this name in case we've already cached it */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's not in the hash yet, so add it */ char* tmp_value = strdup(getenv(name)); scr_hash_util_set_str(scr_env_hash, name, tmp_value); scr_free(&tmp_value); /* now issue our lookup again */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's an error if we don't find it this time */ scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d", name, __FILE__, __LINE__ ); } } return value; } /* otherwise, if parameter is set in user configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_user_hash, name); if (no_user == NULL && value != NULL) { return value; } /* otherwise, if parameter is set in system configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_system_hash, name); if (value != NULL) { return value; } /* parameter not found, return NULL */ return NULL; }
/* since on a restart we may end up with more or fewer ranks on a node than the * previous run, rely on the master to read in and distribute the filemap to * other ranks on the node */ int scr_scatter_filemaps(scr_filemap* my_map) { /* TODO: if the control directory is on a device shared by lots of procs, * we should read and distribute this data in a more scalable way */ /* allocate empty send hash */ scr_hash* send_hash = scr_hash_new(); /* if i'm the master on this node, read in all filemaps */ if (scr_storedesc_cntl->rank == 0) { /* create an empty filemap */ scr_filemap* all_map = scr_filemap_new(); /* read in the master map */ scr_hash* hash = scr_hash_new(); scr_hash_read_path(scr_master_map_file, hash); /* for each filemap listed in the master map */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap")); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename of this filemap */ char* file = scr_hash_elem_key(elem); /* TODO MEMFS: mount storage for each filemap */ /* read in the filemap */ scr_filemap* tmp_map = scr_filemap_new(); scr_path* path_file = scr_path_from_str(file); scr_filemap_read(path_file, tmp_map); scr_path_delete(&path_file); /* merge it with the all_map */ scr_filemap_merge(all_map, tmp_map); /* delete filemap */ scr_filemap_delete(&tmp_map); /* TODO: note that if we fail after unlinking this file but before * writing out the new file, we'll lose information */ /* delete the file */ scr_file_unlink(file); } /* free the hash object */ scr_hash_delete(&hash); /* write out new local 0 filemap */ if (scr_filemap_num_ranks(all_map) > 0) { scr_filemap_write(scr_map_file, all_map); } /* get global rank of each rank */ int* ranks = (int*) SCR_MALLOC(scr_storedesc_cntl->ranks * sizeof(int)); MPI_Gather( &scr_my_rank_world, 1, MPI_INT, ranks, 1, MPI_INT, 0, scr_storedesc_cntl->comm ); /* for each rank, send them their own file data if we have it */ int i; for (i=0; i < scr_storedesc_cntl->ranks; i++) { int rank = ranks[i]; if (scr_filemap_have_rank(all_map, rank)) { /* extract the filemap for this rank */ scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, rank); /* get a reference to the hash object that we'll send to this rank, * and merge this filemap into it */ scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i); if (tmp_hash == NULL) { /* if we don't find an existing entry in the send_hash, * create an empty hash and insert it */ scr_hash* empty_hash = scr_hash_new(); scr_hash_setf(send_hash, empty_hash, "%d", i); tmp_hash = empty_hash; } scr_hash_merge(tmp_hash, tmp_map); /* delete the filemap for this rank */ scr_filemap_delete(&tmp_map); } } /* free our rank list */ scr_free(&ranks); /* now just round robin the remainder across the set (load balancing) */ int num; int* remaining_ranks = NULL; scr_filemap_list_ranks(all_map, &num, &remaining_ranks); int j = 0; while (j < num) { /* pick a rank in to send to */ i = j % scr_storedesc_cntl->ranks; /* extract the filemap for this rank */ scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, remaining_ranks[j]); /* get a reference to the hash object that we'll send to this rank, * and merge this filemap into it */ scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i); if (tmp_hash == NULL) { /* if we don't find an existing entry in the send_hash, * create an empty hash and insert it */ scr_hash* empty_hash = scr_hash_new(); scr_hash_setf(send_hash, empty_hash, "%d", i); tmp_hash = empty_hash; } scr_hash_merge(tmp_hash, tmp_map); /* delete the filemap for this rank */ scr_filemap_delete(&tmp_map); j++; } scr_free(&remaining_ranks); /* delete the filemap */ scr_filemap_delete(&all_map); /* write out the new master filemap */ hash = scr_hash_new(); char file[SCR_MAX_FILENAME]; for (i=0; i < scr_storedesc_cntl->ranks; i++) { sprintf(file, "%s/filemap_%d.scrinfo", scr_cntl_prefix, i); scr_hash_set_kv(hash, "Filemap", file); } scr_hash_write_path(scr_master_map_file, hash); scr_hash_delete(&hash); } else { /* send our global rank to the master */ MPI_Gather( &scr_my_rank_world, 1, MPI_INT, NULL, 1, MPI_INT, 0, scr_storedesc_cntl->comm ); } /* receive our filemap from master */ scr_hash* recv_hash = scr_hash_new(); scr_hash_exchange(send_hash, recv_hash, scr_storedesc_cntl->comm); /* merge map sent from master into our map */ scr_hash* map_from_master = scr_hash_getf(recv_hash, "%d", 0); if (map_from_master != NULL) { scr_hash_merge(my_map, map_from_master); } /* write out our local filemap */ if (scr_filemap_num_ranks(my_map) > 0) { scr_filemap_write(scr_map_file, my_map); } /* free off our send and receive hashes */ scr_hash_delete(&recv_hash); scr_hash_delete(&send_hash); return SCR_SUCCESS; }
/* remove any dataset ids from flush file which are not in cache, * and add any datasets in cache that are not in the flush file */ int scr_flush_file_rebuild(const scr_filemap* map) { if (scr_my_rank_world == 0) { /* read the flush file */ scr_hash* hash = scr_hash_new(); scr_hash_read_path(scr_flush_file, hash); /* get ordered list of dataset ids in flush file */ int flush_ndsets; int* flush_dsets; scr_hash* flush_dsets_hash = scr_hash_get(hash, SCR_FLUSH_KEY_DATASET); scr_hash_list_int(flush_dsets_hash, &flush_ndsets, &flush_dsets); /* get ordered list of dataset ids in cache */ int cache_ndsets; int* cache_dsets; scr_filemap_list_datasets(map, &cache_ndsets, &cache_dsets); int flush_index = 0; int cache_index = 0; while (flush_index < flush_ndsets && cache_index < cache_ndsets) { /* get next smallest index from flush file and cache */ int flush_dset = flush_dsets[flush_index]; int cache_dset = cache_dsets[cache_index]; if (flush_dset < cache_dset) { /* dataset exists in flush file but not in cache, * delete it from the flush file */ scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset); flush_index++; } else if (cache_dset < flush_dset) { /* dataset exists in cache but not flush file, * add it to the flush file */ scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset); scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE); cache_index++; } else { /* dataset exists in cache and the flush file, * ensure that it is listed as being in the cache */ scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset); scr_hash_unset_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE); scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE); flush_index++; cache_index++; } } while (flush_index < flush_ndsets) { /* dataset exists in flush file but not in cache, * delete it from the flush file */ int flush_dset = flush_dsets[flush_index]; scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset); flush_index++; } while (cache_index < cache_ndsets) { /* dataset exists in cache but not flush file, * add it to the flush file */ int cache_dset = cache_dsets[cache_index]; scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset); scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE); cache_index++; } /* free our list of cache dataset ids */ scr_free(&cache_dsets); /* free our list of flush file dataset ids */ scr_free(&flush_dsets); /* write the hash back to the flush file */ scr_hash_write_path(scr_flush_file, hash); /* delete the hash */ scr_hash_delete(&hash); } return SCR_SUCCESS; }
/* flush files specified in list, and record corresponding entries for summary file */ static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary) { /* assume we will succeed in this flush */ int rc = SCR_SUCCESS; /* flush each of my files and fill in summary data structure */ scr_hash_elem* elem = NULL; scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE); for (elem = scr_hash_elem_first(files); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename */ char* file = scr_hash_elem_key(elem); /* convert file to path and extract name of file */ scr_path* path_name = scr_path_from_str(file); scr_path_basename(path_name); /* get the hash for this element */ scr_hash* hash = scr_hash_elem_hash(elem); /* get meta data for this file */ scr_meta* meta = scr_hash_get(hash, SCR_KEY_META); /* if segments are defined, we flush the file to its containers, * otherwise we copy the file out as is */ scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT); if (segments != NULL) { /* TODO: PRESERVE get original filename here */ /* add this file to the summary file */ char* name = scr_path_strdup(path_name); scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name); scr_free(&name); // USERDEF fixme! /* flush the file to the containers listed in its segmenets */ if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) { /* successfully flushed this file, record the filesize */ unsigned long filesize = 0; if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) { scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize); } /* record the crc32 if one was computed */ uLong crc = 0; if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) { scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc); } /* record segment information in summary file */ scr_hash* segments_copy = scr_hash_new(); scr_hash_merge(segments_copy, segments); scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy); } else { /* the flush failed */ rc = SCR_FAILURE; /* explicitly mark file as incomplete */ scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0); } } else { /* get directory to flush file to */ char* dir; if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) { /* create full path of destination file */ scr_path* path_full = scr_path_from_str(dir); scr_path_append(path_full, path_name); /* get relative path to flushed file from SCR_PREFIX directory */ scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full); if (! scr_path_is_null(path_relative)) { /* record the name of the file in the summary hash, and get reference to a hash for this file */ char* name = scr_path_strdup(path_relative); scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name); scr_free(&name); /* flush the file and fill in the meta data for this file */ if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) { /* successfully flushed this file, record the filesize */ unsigned long filesize = 0; if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) { scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize); } /* record the crc32 if one was computed */ uLong crc = 0; if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) { scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc); } } else { /* the flush failed */ rc = SCR_FAILURE; /* explicitly mark incomplete files */ scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0); } } else { scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d", dir, scr_prefix, __FILE__, __LINE__ ); } /* free relative and full paths */ scr_path_delete(&path_relative); scr_path_delete(&path_full); } else { scr_abort(-1, "Failed to read directory to flush file to @ %s:%d", __FILE__, __LINE__ ); } } /* free the file name path */ scr_path_delete(&path_name); } return rc; }
/* given a hash of transfer file data, look for a file which needs to * be transfered. If src file is set, try to continue with that file, * otherwise, pick the first available file */ int find_file(scr_hash* hash, char** src, char** dst, off_t* position, off_t* filesize) { int found_a_file = 0; scr_hash* files = scr_hash_get(hash, SCR_TRANSFER_KEY_FILES); if (files != NULL) { /* if we're given a file name, try to continue with that file */ if (!found_a_file && src != NULL && *src != NULL) { /* src was set, so assume dst is also set, create a dummy dst * variable to hold the string which may be strdup'd in * need_transfer call */ char* tmp_dst = NULL; if (need_transfer(files, *src, &tmp_dst, position, filesize) == SCR_SUCCESS) { /* can continue with the same file (position may have been * updated though) */ found_a_file = 1; /* free the dummy */ /* TODO: note that if destination has been updated, we're * ignoring that change */ free(tmp_dst); } else { /* otherwise, this file no longer needs transfered, * so free the strings */ clear_parameters(src, dst, position); } } /* if we still don't have a file, scan the hash and use the first * file we find */ if (!found_a_file) { scr_hash_elem* elem; for (elem = scr_hash_elem_first(files); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename */ char* name = scr_hash_elem_key(elem); /* check whether this file needs transfered */ if (name != NULL && need_transfer(files, name, dst, position, filesize) == SCR_SUCCESS) { /* found a file, copy its name (the destination and postion * are set in need_transfer) */ *src = strdup(name); found_a_file = 1; break; } } } } /* if we didn't find a file, set src and dst to NULL and set * position to 0 */ if (!found_a_file) { clear_parameters(src, dst, position); return SCR_FAILURE; } return SCR_SUCCESS; }
int main(int argc, char* argv[]) { int i, j; int index = 1; /* print usage if not enough arguments were given */ if (argc < 2) { printf("Usage: scr_rebuild_xor <size> <root> <missing_xor_filename> <ordered_remaining_xor_filenames>\n"); return 1; } /* TODO: want to pass this on command line? */ /* get current working directory */ char dsetdir[SCR_MAX_FILENAME]; scr_getcwd(dsetdir, sizeof(dsetdir)); /* create and reduce path for dataset */ scr_path* path_dset = scr_path_from_str(dsetdir); scr_path_reduce(path_dset); /* allocate buffers */ char* buffer_A = malloc(buffer_size * sizeof(char)); char* buffer_B = malloc(buffer_size * sizeof(char)); if (buffer_A == NULL || buffer_B == NULL) { scr_err("Failed to allocate buffer memory @ %s:%d", __FILE__, __LINE__ ); return 1; } /* read in the size of the XOR set */ int xor_set_size = (int) strtol(argv[index++], (char **)NULL, 10); if (xor_set_size <= 0) { scr_err("Invalid XOR set size argument %s @ %s:%d", argv[index-1], __FILE__, __LINE__ ); return 1; } /* allocate memory for data structures based on the XOR set size */ int* num_files = malloc(xor_set_size * sizeof(int)); int* offsets = malloc(xor_set_size * sizeof(int)); char** xor_files = malloc(xor_set_size * sizeof(char*)); int* xor_fds = malloc(xor_set_size * sizeof(int)); scr_hash** xor_headers = malloc(xor_set_size * sizeof(scr_hash*)); if (num_files == NULL || offsets == NULL || xor_files == NULL || xor_fds == NULL || xor_headers == NULL) { scr_err("Failed to allocate buffer memory @ %s:%d", __FILE__, __LINE__ ); return 1; } /* read in the rank of the missing process (the root) */ int root = (int) strtol(argv[index++], (char **)NULL, 10); if (root < 0 || root >= xor_set_size) { scr_err("Invalid root argument %s @ %s:%d", argv[index-1], __FILE__, __LINE__ ); return 1; } /* read in the missing xor filename */ xor_files[0] = strdup(argv[index++]); if (xor_files[0] == NULL) { scr_err("Failed to dup XOR filename @ %s:%d", __FILE__, __LINE__ ); return 1; } /* read in the xor filenames (expected to be in order of XOR segment number) */ /* we order ranks so that root is index 0, the rank to the right of root is index 1, and so on */ for (i=0; i < xor_set_size; i++) { xor_headers[i] = scr_hash_new(); /* we'll get the XOR file name for root from the header stored in the XOR file of the partner */ if (i == root) { continue; } /* adjust the index relative to root */ j = i - root; if (j < 0) { j += xor_set_size; } /* copy the XOR file name */ xor_files[j] = strdup(argv[index++]); if (xor_files[j] == NULL) { scr_err("Failed to dup XOR filename @ %s:%d", __FILE__, __LINE__ ); return 1; } } /* open each of the xor files and read in the headers */ for (i=1; i < xor_set_size; i++) { /* open each xor file for reading */ xor_fds[i] = scr_open(xor_files[i], O_RDONLY); if (xor_fds[i] < 0) { scr_err("Opening xor segment file: scr_open(%s) errno=%d %s @ %s:%d", xor_files[i], errno, strerror(errno), __FILE__, __LINE__ ); return 1; } /* read the header from this xor file */ if (scr_hash_read_fd(xor_files[i], xor_fds[i], xor_headers[i]) < 0) { scr_err("Failed to read XOR header from %s @ %s:%d", xor_files[i], __FILE__, __LINE__ ); return 1; } } /* build header for missing XOR file */ int partner_rank = -1; if (xor_set_size >= 2) { scr_hash_merge(xor_headers[0], xor_headers[1]); /* fetch our own file list from rank to our right */ scr_hash* rhs_hash = scr_hash_get(xor_headers[1], SCR_KEY_COPY_XOR_PARTNER); scr_hash* current_hash = scr_hash_new(); scr_hash_merge(current_hash, rhs_hash); scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT, current_hash); /* we are the partner to the rank to our left */ scr_hash* lhs_hash = scr_hash_get(xor_headers[xor_set_size-1], SCR_KEY_COPY_XOR_CURRENT); scr_hash* partner_hash = scr_hash_new(); scr_hash_merge(partner_hash, lhs_hash); scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_PARTNER, partner_hash); /* get global rank of partner */ if (scr_hash_util_get_int(lhs_hash, SCR_KEY_COPY_XOR_RANK, &partner_rank) != SCR_SUCCESS) { scr_err("Failed to read partner rank from XOR file header in %s @ %s:%d", xor_files[xor_set_size-1], __FILE__, __LINE__ ); return 1; } } /* get a pointer to the current hash for the missing rank */ scr_hash* missing_current_hash = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT); /* read the rank */ int my_rank = -1; if (scr_hash_util_get_int(missing_current_hash, SCR_KEY_COPY_XOR_RANK, &my_rank) != SCR_SUCCESS) { scr_err("Failed to read rank from XOR file header in %s @ %s:%d", xor_files[0], __FILE__, __LINE__ ); return 1; } /* get the dataset */ scr_dataset* dataset = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_DATASET); /* read the dataset id */ int dset_id = -1; if (scr_dataset_get_id(dataset, &dset_id) != SCR_SUCCESS) { scr_err("Failed to read dataset id from XOR file header in %s @ %s:%d", xor_files[0], __FILE__, __LINE__ ); return 1; } /* read the ranks */ int num_ranks = -1; if (scr_hash_util_get_int(xor_headers[0], SCR_KEY_COPY_XOR_RANKS, &num_ranks) != SCR_SUCCESS) { scr_err("Failed to read ranks from XOR file header in %s @ %s:%d", xor_files[0], __FILE__, __LINE__ ); return 1; } /* get name of partner's fmap */ scr_path* path_partner_map = scr_path_from_str(".scr"); scr_path_append_strf(path_partner_map, "fmap.%d.scr", partner_rank); /* extract partner's flush descriptor */ scr_hash* flushdesc = scr_hash_new(); scr_filemap* partner_map = scr_filemap_new(); scr_filemap_read(path_partner_map, partner_map); scr_filemap_get_flushdesc(partner_map, dset_id, partner_rank, flushdesc); scr_filemap_delete(&partner_map); /* delete partner map path */ scr_path_delete(&path_partner_map); /* determine whether we should preserve user directories */ int preserve_dirs = 0; scr_hash_util_get_int(flushdesc, SCR_SCAVENGE_KEY_PRESERVE, &preserve_dirs); /* read the chunk size */ unsigned long chunk_size = 0; if (scr_hash_util_get_unsigned_long(xor_headers[0], SCR_KEY_COPY_XOR_CHUNK, &chunk_size) != SCR_SUCCESS) { scr_err("Failed to read chunk size from XOR file header in %s @ %s:%d", xor_files[0], __FILE__, __LINE__ ); return 1; } /* determine number of files each member wrote in XOR set */ for (i=0; i < xor_set_size; i++) { /* record the number of files for this rank */ scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT); if (scr_hash_util_get_int(current_hash, SCR_KEY_COPY_XOR_FILES, &num_files[i]) != SCR_SUCCESS) { scr_err("Failed to read number of files from %s @ %s:%d", xor_files[i], __FILE__, __LINE__ ); return 1; } } /* count the total number of files and set the offsets array */ int total_num_files = 0; for (i=0; i < xor_set_size; i++) { offsets[i] = total_num_files; total_num_files += num_files[i]; } /* allocate space for a file descriptor, file name pointer, and filesize for each user file */ int* user_fds = (int*) malloc(total_num_files * sizeof(int)); char** user_files = (char**) malloc(total_num_files * sizeof(char*)); char** user_rel_files = (char**) malloc(total_num_files * sizeof(char*)); unsigned long* user_filesizes = (unsigned long*) malloc(total_num_files * sizeof(unsigned long)); if (user_fds == NULL || user_files == NULL || user_rel_files == NULL || user_filesizes == NULL) { scr_err("Failed to allocate buffer memory @ %s:%d", __FILE__, __LINE__ ); return 1; } /* get file name, file size, and open each of the user files that we have */ for (i=0; i < xor_set_size; i++) { scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT); /* for each file belonging to this rank, get filename, filesize, and open file */ for (j=0; j < num_files[i]; j++) { int offset = offsets[i] + j; /* get the meta data for this file */ scr_meta* meta = scr_hash_get_kv_int(current_hash, SCR_KEY_COPY_XOR_FILE, j); if (meta == NULL) { scr_err("Failed to read meta data for file %d in %s @ %s:%d", j, xor_files[i], __FILE__, __LINE__ ); return 1; } /* record the filesize of this file */ if (scr_meta_get_filesize(meta, &user_filesizes[offset]) != SCR_SUCCESS) { scr_err("Failed to read filesize field for file %d in %s @ %s:%d", j, xor_files[i], __FILE__, __LINE__ ); return 1; } /* get filename */ char* origname; if (scr_meta_get_origname(meta, &origname) != SCR_SUCCESS) { scr_err("Failed to read original name for file %d in %s @ %s:%d", j, xor_files[i], __FILE__, __LINE__ ); return 1; } /* construct full path to user file */ scr_path* path_user_full = scr_path_from_str(origname); if (preserve_dirs) { /* get original path of file */ char* origpath; if (scr_meta_get_origpath(meta, &origpath) != SCR_SUCCESS) { scr_err("Failed to read original path for file %d in %s @ %s:%d", j, xor_files[i], __FILE__, __LINE__ ); return 1; } /* construct full path to file */ scr_path_prepend_str(path_user_full, origpath); } else { /* construct full path to file */ scr_path_prepend(path_user_full, path_dset); } /* reduce path to user file */ scr_path_reduce(path_user_full); /* make a copy of the full path */ user_files[offset] = scr_path_strdup(path_user_full); /* make a copy of relative path */ scr_path* path_user_rel = scr_path_relative(path_dset, path_user_full); user_rel_files[offset] = scr_path_strdup(path_user_rel); scr_path_delete(&path_user_rel); /* free the full path */ scr_path_delete(&path_user_full); /* open the file */ if (i == 0) { /* create directory for file */ scr_path* user_dir_path = scr_path_from_str(user_files[offset]); scr_path_reduce(user_dir_path); scr_path_dirname(user_dir_path); if (! scr_path_is_null(user_dir_path)) { char* user_dir = scr_path_strdup(user_dir_path); mode_t mode_dir = scr_getmode(1, 1, 1); if (scr_mkdir(user_dir, mode_dir) != SCR_SUCCESS) { scr_err("Failed to create directory for user file %s @ %s:%d", user_dir, __FILE__, __LINE__ ); return 1; } scr_free(&user_dir); } scr_path_delete(&user_dir_path); /* open missing file for writing */ mode_t mode_file = scr_getmode(1, 1, 0); user_fds[offset] = scr_open(user_files[offset], O_WRONLY | O_CREAT | O_TRUNC, mode_file); if (user_fds[offset] < 0) { scr_err("Opening user file for writing: scr_open(%s) errno=%d %s @ %s:%d", user_files[offset], errno, strerror(errno), __FILE__, __LINE__ ); return 1; } } else { /* open existing file for reading */ user_fds[offset] = scr_open(user_files[offset], O_RDONLY); if (user_fds[offset] < 0) { scr_err("Opening user file for reading: scr_open(%s) errno=%d %s @ %s:%d", user_files[offset], errno, strerror(errno), __FILE__, __LINE__ ); return 1; } } } } /* finally, open the xor file for the missing rank */ mode_t mode_file = scr_getmode(1, 1, 0); xor_fds[0] = scr_open(xor_files[0], O_WRONLY | O_CREAT | O_TRUNC, mode_file); if (xor_fds[0] < 0) { scr_err("Opening xor file to be reconstructed: scr_open(%s) errno=%d %s @ %s:%d", xor_files[0], errno, strerror(errno), __FILE__, __LINE__ ); return 1; } int rc = 0; /* write the header to the XOR file of the missing rank */ if (scr_hash_write_fd(xor_files[0], xor_fds[0], xor_headers[0]) < 0) { rc = 1; } /* this offset array records the current position we are in the logical file for each rank */ unsigned long* offset = malloc(xor_set_size * sizeof(unsigned long)); if (offset == NULL) { scr_err("Failed to allocate buffer memory @ %s:%d", __FILE__, __LINE__ ); return 1; } for (i=0; i < xor_set_size; i++) { offset[i] = 0; } unsigned long write_pos = 0; int chunk_id; for (chunk_id = 0; chunk_id < xor_set_size && rc == 0; chunk_id++) { size_t nread = 0; while (nread < chunk_size && rc == 0) { /* read upto buffer_size bytes at a time */ size_t count = chunk_size - nread; if (count > buffer_size) { count = buffer_size; } /* clear our buffer */ memset(buffer_A, 0, count); /* read a segment from each rank and XOR it into our buffer */ for (i=1; i < xor_set_size; i++) { /* read the next set of bytes for this chunk from my file into send_buf */ if (chunk_id != ((i + root) % xor_set_size)) { /* read chunk from the logical file for this rank */ if (scr_read_pad_n(num_files[i], &user_files[offsets[i]], &user_fds[offsets[i]], buffer_B, count, offset[i], &user_filesizes[offsets[i]]) != SCR_SUCCESS) { /* our read failed, set the return code to an error */ rc = 1; count = 0; } offset[i] += count; } else { /* read chunk from the XOR file for this rank */ if (scr_read_attempt(xor_files[i], xor_fds[i], buffer_B, count) != count) { /* our read failed, set the return code to an error */ rc = 1; count = 0; } } /* TODO: XORing with unsigned long would be faster here (if chunk size is multiple of this size) */ /* merge the blocks via xor operation */ for (j = 0; j < count; j++) { buffer_A[j] ^= buffer_B[j]; } } /* at this point, we have the data from the missing rank, write it out */ if (chunk_id != root) { /* write chunk to logical file for the missing rank */ if (scr_write_pad_n(num_files[0], &user_files[0], &user_fds[0], buffer_A, count, write_pos, &user_filesizes[0]) != SCR_SUCCESS) { /* our write failed, set the return code to an error */ rc = 1; } write_pos += count; } else { /* write chunk to xor file for the missing rank */ if (scr_write_attempt(xor_files[0], xor_fds[0], buffer_A, count) != count) { /* our write failed, set the return code to an error */ rc = 1; } } nread += count; } } /* close each of the user files */ for (i=0; i < total_num_files; i++) { if (scr_close(user_files[i], user_fds[i]) != SCR_SUCCESS) { rc = 1; } } /* close each of the XOR files */ for (i=0; i < xor_set_size; i++) { if (scr_close(xor_files[i], xor_fds[i]) != SCR_SUCCESS) { rc = 1; } } /* if the write failed, delete the files we just wrote, and return an error */ if (rc != 0) { for (j=0; j < num_files[0]; j++) { scr_file_unlink(user_files[j]); } scr_file_unlink(xor_files[0]); return 1; } /* check that filesizes are correct */ unsigned long filesize; for (j=0; j < num_files[0]; j++) { filesize = scr_file_size(user_files[j]); if (filesize != user_filesizes[j]) { /* the filesize check failed, so delete the file */ scr_file_unlink(user_files[j]); /* mark the file as incomplete */ scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j); scr_meta_set_complete(meta, 0); rc = 1; } } /* TODO: we didn't record the filesize of the XOR file for the missing rank anywhere */ /* create a filemap for this rank */ scr_filemap* map = scr_filemap_new(); if (map == NULL) { scr_err("Failed to allocate filemap @ %s:%d", __FILE__, __LINE__ ); return 1; } /* record the dataset information in the filemap */ scr_filemap_set_dataset(map, dset_id, my_rank, dataset); /* write meta data for each of the user files and add each one to the filemap */ for (j=0; j < num_files[0]; j++) { /* add user file to filemap and record meta data */ char* user_file_relative = user_rel_files[j]; scr_filemap_add_file(map, dset_id, my_rank, user_file_relative); scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j); scr_filemap_set_meta(map, dset_id, my_rank, user_file_relative, meta); } /* write meta data for xor file and add it to the filemap */ scr_filemap_add_file(map, dset_id, my_rank, xor_files[0]); unsigned long full_chunk_filesize = scr_file_size(xor_files[0]); int missing_complete = 1; scr_meta* meta_chunk = scr_meta_new(); scr_meta_set_filename(meta_chunk, xor_files[0]); scr_meta_set_filetype(meta_chunk, SCR_META_FILE_XOR); scr_meta_set_filesize(meta_chunk, full_chunk_filesize); /* TODO: remove this from meta file, for now it's needed in scr_index.c */ scr_meta_set_ranks(meta_chunk, num_ranks); scr_meta_set_complete(meta_chunk, missing_complete); scr_filemap_set_meta(map, dset_id, my_rank, xor_files[0], meta_chunk); /* set expected number of files for the missing rank */ int expected_num_files = scr_filemap_num_files(map, dset_id, my_rank); scr_filemap_set_expected_files(map, dset_id, my_rank, expected_num_files); /* compute, check, and store crc values with files */ for (j=0; j < num_files[0]; j++) { /* compute crc on user file */ char* user_file_relative = user_rel_files[j]; if (scr_compute_crc(map, dset_id, my_rank, user_file_relative) != SCR_SUCCESS) { /* the crc check failed, so delete the file */ scr_file_unlink(user_files[j]); rc = 1; } } if (scr_compute_crc(map, dset_id, my_rank, xor_files[0]) != SCR_SUCCESS) { /* the crc check failed, so delete the file */ scr_file_unlink(xor_files[0]); rc = 1; } /* store flush descriptor */ scr_filemap_set_flushdesc(map, dset_id, my_rank, flushdesc); /* write filemap for this rank */ scr_path* path_map = scr_path_from_str(".scr"); scr_path_append_strf(path_map, "fmap.%d.scr", my_rank); if (scr_filemap_write(path_map, map) != SCR_SUCCESS) { rc = 1; } scr_path_delete(&path_map); /* delete the map */ scr_filemap_delete(&map); scr_meta_delete(&meta_chunk); /* delete the flush/scavenge descriptor */ scr_hash_delete(&flushdesc); scr_free(&offset); for (i=0; i < total_num_files; i++) { scr_free(&user_rel_files[i]); scr_free(&user_files[i]); } scr_free(&user_filesizes); scr_free(&user_rel_files); scr_free(&user_files); scr_free(&user_fds); for (i=0; i < xor_set_size; i++) { scr_hash_delete(&xor_headers[i]); } for (i=0; i < xor_set_size; i++) { scr_free(&xor_files[i]); } scr_free(&xor_headers); scr_free(&xor_fds); scr_free(&xor_files); scr_free(&offsets); scr_free(&num_files); scr_free(&buffer_B); scr_free(&buffer_A); scr_path_delete(&path_dset); return rc; }
int main(int argc, char* argv[]) { /* print usage if not enough arguments were given */ if (argc < 2) { printf("Usage: scr_inspect_cache <cntldir>\n"); return 1; } scr_path* scr_master_map_file = scr_path_from_str(strdup(argv[1])); /* get my hostname */ if (gethostname(scr_my_hostname, sizeof(scr_my_hostname)) != 0) { scr_err("scr_inspect_cache: Call to gethostname failed @ %s:%d", __FILE__, __LINE__ ); return 1; } /* read in the master map */ scr_hash* hash = scr_hash_new(); scr_hash_read_path(scr_master_map_file, hash); /* create an empty filemap */ scr_filemap* map = scr_filemap_new(); /* for each filemap listed in the master map */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap")); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename of this filemap */ char* file = scr_hash_elem_key(elem); /* read in the filemap */ scr_filemap* tmp_map = scr_filemap_new(); scr_path* path_file = scr_path_from_str(file); scr_filemap_read(path_file, tmp_map); scr_path_delete(&path_file); /* merge it with local 0 filemap */ scr_filemap_merge(map, tmp_map); /* delete filemap */ scr_filemap_delete(&tmp_map); } /* scan each file for each rank of each dataset */ scr_hash_elem* dset_elem; for (dset_elem = scr_filemap_first_dataset(map); dset_elem != NULL; dset_elem = scr_hash_elem_next(dset_elem)) { /* get dataset id */ int dset = scr_hash_elem_key_int(dset_elem); scr_hash_elem* rank_elem; for (rank_elem = scr_filemap_first_rank_by_dataset(map, dset); rank_elem != NULL; rank_elem = scr_hash_elem_next(rank_elem)) { /* get rank id */ int rank = scr_hash_elem_key_int(rank_elem); int missing_file = 0; int expected = scr_filemap_get_expected_files(map, dset, rank); int num = scr_filemap_num_files(map, dset, rank); if (expected == num) { /* first time through the file list, check that we have each file */ scr_hash_elem* file_elem = NULL; for (file_elem = scr_filemap_first_file(map, dset, rank); file_elem != NULL; file_elem = scr_hash_elem_next(file_elem)) { /* get filename */ char* file = scr_hash_elem_key(file_elem); /* check that we can read the file */ if (! scr_bool_have_file(map, dset, rank, file)) { missing_file = 1; scr_dbg(1, "File is unreadable or incomplete: Dataset %d, Rank %d, File: %s", dset, rank, file ); } } } else { missing_file = 1; } /* TODO: print partner names */ /* if we're not missing a file for rank, print this info out */ if (! missing_file) { scr_hash* desc = scr_hash_new(); scr_filemap_get_desc(map, dset, rank, desc); char* type = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_TYPE); char* groups_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUPS); char* group_id_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_ID); char* group_size_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_SIZE); char* group_rank_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_RANK); if (type != NULL && groups_str != NULL && group_id_str != NULL && group_size_str != NULL && group_rank_str != NULL) { /* we already have a group id and rank, use that to rebuild the communicator */ int groups = atoi(groups_str); int group_id = atoi(group_id_str); int group_size = atoi(group_size_str); int group_rank = atoi(group_rank_str); printf("DSET=%d RANK=%d TYPE=%s GROUPS=%d GROUP_ID=%d GROUP_SIZE=%d GROUP_RANK=%d FILES=1\n", dset, rank, type, groups, group_id, group_size, group_rank ); } } } } scr_path_delete(&scr_master_map_file); return 0; }
/* verify the hash is a valid hash for a version 5 summary file */ static int scr_summary_check_v5(scr_hash* hash) { /* check that the summary file version is something we support */ int version; if (scr_hash_util_get_int(hash, SCR_SUMMARY_KEY_VERSION, &version) != SCR_SUCCESS) { /* couldn't find version number */ scr_err("Failed to read version number in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } if (version != SCR_SUMMARY_FILE_VERSION_5) { /* invalid version number */ scr_err("Found version number %d when %d was expected in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* check that we have exactly one checkpoint */ scr_hash* ckpt_hash = scr_hash_get(hash, SCR_SUMMARY_5_KEY_CKPT); if (scr_hash_size(ckpt_hash) != 1) { scr_err("More than one checkpoint found in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* get the first (and only) checkpoint id */ char* ckpt_str = scr_hash_elem_get_first_val(hash, SCR_SUMMARY_5_KEY_CKPT); scr_hash* ckpt = scr_hash_get(ckpt_hash, ckpt_str); /* check that the complete string is set and is set to 1 */ int complete; if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_COMPLETE, &complete) != SCR_SUCCESS) { /* could not find complete value (assume it's incomplete) */ return SCR_FAILURE; } if (complete != 1) { /* checkpoint is marked as incomplete */ return SCR_FAILURE; } /* read in the the number of ranks for this checkpoint */ int ranks; if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_RANKS, &ranks) != SCR_SUCCESS) { scr_err("Failed to read number of ranks in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* check that the number of ranks matches the number we're currently running with */ if (ranks != scr_ranks_world) { scr_err("Number of ranks %d that wrote checkpoint does not match current number of ranks %d @ %s:%d", ranks, scr_ranks_world, __FILE__, __LINE__ ); return SCR_FAILURE; } return SCR_SUCCESS; }
/* fetch files from parallel file system */ static int scr_fetch_files( scr_filemap* map, scr_path* fetch_path, int* dataset_id, int* checkpoint_id) { /* get fetch directory as string */ char* fetch_dir = scr_path_strdup(fetch_path); /* this may take a while, so tell user what we're doing */ if (scr_my_rank_world == 0) { scr_dbg(1, "Attempting fetch from %s", fetch_dir); } /* make sure all processes make it this far before progressing */ MPI_Barrier(scr_comm_world); /* start timer */ time_t timestamp_start; double time_start; if (scr_my_rank_world == 0) { timestamp_start = scr_log_seconds(); time_start = MPI_Wtime(); } /* log the fetch attempt */ if (scr_my_rank_world == 0) { if (scr_log_enable) { time_t now = scr_log_seconds(); scr_log_event("FETCH STARTED", fetch_dir, NULL, &now, NULL); } } /* allocate a new hash to get a list of files to fetch */ scr_hash* file_list = scr_hash_new(); /* read the summary file */ if (scr_fetch_summary(fetch_dir, file_list) != SCR_SUCCESS) { if (scr_my_rank_world == 0) { scr_dbg(1, "Failed to read summary file @ %s:%d", __FILE__, __LINE__); if (scr_log_enable) { double time_end = MPI_Wtime(); double time_diff = time_end - time_start; time_t now = scr_log_seconds(); scr_log_event("FETCH FAILED", fetch_dir, NULL, &now, &time_diff); } } scr_hash_delete(&file_list); scr_free(&fetch_dir); return SCR_FAILURE; } /* get a pointer to the dataset */ scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET); /* get the dataset id */ int id; if (scr_dataset_get_id(dataset, &id) != SCR_SUCCESS) { if (scr_my_rank_world == 0) { scr_dbg(1, "Invalid id in summary file @ %s:%d", __FILE__, __LINE__); if (scr_log_enable) { double time_end = MPI_Wtime(); double time_diff = time_end - time_start; time_t now = scr_log_seconds(); scr_log_event("FETCH FAILED", fetch_dir, NULL, &now, &time_diff); } } scr_hash_delete(&file_list); scr_free(&fetch_dir); return SCR_FAILURE; } /* get the checkpoint id for this dataset */ int ckpt_id; if (scr_dataset_get_ckpt(dataset, &ckpt_id) != SCR_SUCCESS) { /* eventually, we'll support reading of non-checkpoint datasets, * but we don't yet */ scr_err("Failed to read checkpoint id from dataset @ %s:%d", __FILE__, __LINE__ ); scr_hash_delete(&file_list); scr_free(&fetch_dir); return SCR_FAILURE; } /* delete any existing files for this dataset id (do this before * filemap_read) */ scr_cache_delete(map, id); /* get the redundancy descriptor for this id */ scr_reddesc* c = scr_reddesc_for_checkpoint(ckpt_id, scr_nreddescs, scr_reddescs); /* store our redundancy descriptor hash in the filemap */ scr_hash* my_desc_hash = scr_hash_new(); scr_reddesc_store_to_hash(c, my_desc_hash); scr_filemap_set_desc(map, id, scr_my_rank_world, my_desc_hash); scr_hash_delete(&my_desc_hash); /* write the filemap out before creating the directory */ scr_filemap_write(scr_map_file, map); /* create the cache directory */ scr_cache_dir_create(c, id); /* get the cache directory */ char cache_dir[SCR_MAX_FILENAME]; scr_cache_dir_get(c, id, cache_dir); /* now we can finally fetch the actual files */ int success = 1; if (scr_fetch_data(file_list, cache_dir, map) != SCR_SUCCESS) { success = 0; } /* free the hash holding the summary file data */ scr_hash_delete(&file_list); /* check that all processes copied their file successfully */ if (! scr_alltrue(success)) { /* someone failed, so let's delete the partial checkpoint */ scr_cache_delete(map, id); if (scr_my_rank_world == 0) { scr_dbg(1, "One or more processes failed to read its files @ %s:%d", __FILE__, __LINE__ ); if (scr_log_enable) { double time_end = MPI_Wtime(); double time_diff = time_end - time_start; time_t now = scr_log_seconds(); scr_log_event("FETCH FAILED", fetch_dir, &id, &now, &time_diff); } } scr_free(&fetch_dir); return SCR_FAILURE; } /* apply redundancy scheme */ double bytes_copied = 0.0; int rc = scr_reddesc_apply(map, c, id, &bytes_copied); if (rc == SCR_SUCCESS) { /* record dataset and checkpoint ids */ *dataset_id = id; *checkpoint_id = ckpt_id; /* update our flush file to indicate this checkpoint is in cache * as well as the parallel file system */ /* TODO: should we place SCR_FLUSH_KEY_LOCATION_PFS before * scr_reddesc_apply? */ scr_flush_file_location_set(id, SCR_FLUSH_KEY_LOCATION_CACHE); scr_flush_file_location_set(id, SCR_FLUSH_KEY_LOCATION_PFS); scr_flush_file_location_unset(id, SCR_FLUSH_KEY_LOCATION_FLUSHING); } else { /* something went wrong, so delete this checkpoint from the cache */ scr_cache_delete(scr_map, id); } /* stop timer, compute bandwidth, and report performance */ double total_bytes = bytes_copied; if (scr_my_rank_world == 0) { double time_end = MPI_Wtime(); double time_diff = time_end - time_start; double bw = total_bytes / (1024.0 * 1024.0 * time_diff); scr_dbg(1, "scr_fetch_files: %f secs, %e bytes, %f MB/s, %f MB/s per proc", time_diff, total_bytes, bw, bw/scr_ranks_world ); /* log data on the fetch to the database */ if (scr_log_enable) { time_t now = scr_log_seconds(); if (rc == SCR_SUCCESS) { scr_log_event("FETCH SUCCEEDED", fetch_dir, &id, &now, &time_diff); } else { scr_log_event("FETCH FAILED", fetch_dir, &id, &now, &time_diff); } char cache_dir[SCR_MAX_FILENAME]; scr_cache_dir_get(c, id, cache_dir); scr_log_transfer("FETCH", fetch_dir, cache_dir, &id, ×tamp_start, &time_diff, &total_bytes ); } } /* free fetch direcotry string */ scr_free(&fetch_dir); return rc; }
/* read contents of summary file */ static int scr_fetch_summary( const char* summary_dir, scr_hash* file_list) { /* assume that we won't succeed in our fetch attempt */ int rc = SCR_SUCCESS; /* check whether summary file exists and is readable */ if (scr_my_rank_world == 0) { /* check that we can access the directory */ if (scr_file_is_readable(summary_dir) != SCR_SUCCESS) { scr_err("Failed to access summary directory %s @ %s:%d", summary_dir, __FILE__, __LINE__ ); rc = SCR_FAILURE; } } /* broadcast success code from rank 0 */ MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world); if (rc != SCR_SUCCESS) { return rc; } /* add path to file list */ scr_hash_util_set_str(file_list, SCR_KEY_PATH, summary_dir); /* build path to summary file */ scr_path* dataset_path = scr_path_from_str(summary_dir); scr_path* meta_path = scr_path_dup(dataset_path); scr_path_append_str(meta_path, ".scr"); scr_path_reduce(meta_path); /* rank 0 reads the summary file */ scr_hash* header = scr_hash_new(); if (scr_my_rank_world == 0) { /* build path to summary file */ scr_path* summary_path = scr_path_dup(meta_path); scr_path_append_str(summary_path, "summary.scr"); const char* summary_file = scr_path_strdup(summary_path); /* open file for reading */ int fd = scr_open(summary_file, O_RDONLY); if (fd >= 0) { /* read summary hash */ ssize_t header_size = scr_hash_read_fd(summary_file, fd, header); if (header_size < 0) { rc = SCR_FAILURE; } /* TODO: check that the version is correct */ /* close the file */ scr_close(summary_file, fd); } else { scr_err("Failed to open summary file %s @ %s:%d", summary_file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* free summary path and string */ scr_free(&summary_file); scr_path_delete(&summary_path); } /* broadcast success code from rank 0 */ MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world); if (rc != SCR_SUCCESS) { goto cleanup; } /* broadcast the summary hash */ scr_hash_bcast(header, 0, scr_comm_world); /* extract and record the datast in file list */ scr_hash* dataset_hash = scr_hash_new(); scr_dataset* dataset = scr_hash_get(header, SCR_SUMMARY_6_KEY_DATASET); scr_hash_merge(dataset_hash, dataset); scr_hash_set(file_list, SCR_SUMMARY_6_KEY_DATASET, dataset_hash); /* build path to rank2file map */ scr_path* rank2file_path = scr_path_dup(meta_path); scr_path_append_str(rank2file_path, "rank2file.scr"); /* fetch file names and offsets containing file hash data */ int valid = 0; char* file = NULL; unsigned long offset = 0; if (scr_my_rank_world == 0) { /* rank 0 is only valid reader to start with */ valid = 1; file = scr_path_strdup(rank2file_path); offset = 0; } if (scr_fetch_rank2file_map(dataset_path, 1, &valid, &file, &offset) != SCR_SUCCESS) { rc = SCR_FAILURE; } /* create hashes to exchange data */ scr_hash* send = scr_hash_new(); scr_hash* recv = scr_hash_new(); /* read data from file */ if (valid) { /* open file if necessary */ int fd = scr_open(file, O_RDONLY); if (fd >= 0) { /* create hash to hold file contents */ scr_hash* save = scr_hash_new(); /* read hash from file */ scr_lseek(file, fd, offset, SEEK_SET); ssize_t readsize = scr_hash_read_fd(file, fd, save); if (readsize < 0) { scr_err("Failed to read rank2file map file %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* check that the number of ranks match */ int ranks = 0; scr_hash_util_get_int(save, SCR_SUMMARY_6_KEY_RANKS, &ranks); if (ranks != scr_ranks_world) { scr_err("Invalid number of ranks in %s, got %d expected %d @ %s:%d", file, ranks, scr_ranks_world, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* delete current send hash, set it to values from file, * delete file hash */ scr_hash_delete(&send); send = scr_hash_extract(save, SCR_SUMMARY_6_KEY_RANK); scr_hash_delete(&save); /* close the file */ scr_close(file, fd); } else { scr_err("Failed to open rank2file map %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* delete file name string */ scr_free(&file); } /* check that everyone read the data ok */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup_hashes; } /* scatter to groups */ scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT); /* iterate over the ranks that sent data to us, and set up our * list of files */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(recv); elem != NULL; elem = scr_hash_elem_next(elem)) { /* the key is the source rank, which we don't care about, * the info we need is in the element hash */ scr_hash* elem_hash = scr_hash_elem_hash(elem); /* get pointer to file hash */ scr_hash* file_hash = scr_hash_get(elem_hash, SCR_SUMMARY_6_KEY_FILE); if (file_hash != NULL) { /* TODO: parse summary file format */ scr_hash_merge(file_list, elem_hash); } else { rc = SCR_FAILURE; } } /* fill in file list parameters */ if (rc == SCR_SUCCESS) { /* if we're not using containers, add PATH entry for each of our * files */ scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE); for (elem = scr_hash_elem_first(files); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the file name */ char* file = scr_hash_elem_key(elem); /* combine the file name with the summary directory to build a * full path to the file */ scr_path* path_full = scr_path_dup(dataset_path); scr_path_append_str(path_full, file); /* subtract off last component to get just the path */ scr_path_dirname(path_full); char* path = scr_path_strdup(path_full); /* record path in file list */ scr_hash* hash = scr_hash_elem_hash(elem); scr_hash_util_set_str(hash, SCR_KEY_PATH, path); /* free the path and string */ scr_free(&path); scr_path_delete(&path_full); } } /* check that everyone read the data ok */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup_hashes; } cleanup_hashes: /* delete send and receive hashes */ scr_hash_delete(&recv); scr_hash_delete(&send); /* free string and path for rank2file map */ scr_path_delete(&rank2file_path); cleanup: /* free the header hash */ scr_hash_delete(&header); /* free path for dataset directory */ scr_path_delete(&meta_path); scr_path_delete(&dataset_path); return rc; }
static int scr_fetch_rank2file_map( const scr_path* dataset_path, int depth, int* ptr_valid, char** ptr_file, unsigned long* ptr_offset) { int rc = SCR_SUCCESS; /* get local variables so we don't have to deference everything */ int valid = *ptr_valid; char* file = *ptr_file; unsigned long offset = *ptr_offset; /* create a hash to hold section of file */ scr_hash* hash = scr_hash_new(); /* if we can read from file do it */ if (valid) { /* open file if we haven't already */ int fd = scr_open(file, O_RDONLY); if (fd >= 0) { /* read our segment from the file */ scr_lseek(file, fd, offset, SEEK_SET); ssize_t read_rc = scr_hash_read_fd(file, fd, hash); if (read_rc < 0) { scr_err("Failed to read from %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* close the file */ scr_close(file, fd); } else { scr_err("Failed to open rank2file map %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } } /* check for read errors */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup; } /* create hashes to exchange data */ scr_hash* send = scr_hash_new(); scr_hash* recv = scr_hash_new(); /* copy rank data into send hash */ if (valid) { scr_hash* rank_hash = scr_hash_get(hash, SCR_SUMMARY_6_KEY_RANK); scr_hash_merge(send, rank_hash); } /* exchange hashes */ scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT); /* see if anyone sent us anything */ int newvalid = 0; char* newfile = NULL; unsigned long newoffset = 0; scr_hash_elem* elem = scr_hash_elem_first(recv); if (elem != NULL) { /* got something, so now we'll read in the next step */ newvalid = 1; /* get file name we should read */ scr_hash* elem_hash = scr_hash_elem_hash(elem); char* value; if (scr_hash_util_get_str(elem_hash, SCR_SUMMARY_6_KEY_FILE, &value) == SCR_SUCCESS) { /* return string of full path to file to caller */ scr_path* newpath = scr_path_dup(dataset_path); scr_path_append_str(newpath, value); newfile = scr_path_strdup(newpath); scr_path_delete(&newpath); } else { rc = SCR_FAILURE; } /* get offset we should start reading from */ if (scr_hash_util_get_bytecount(elem_hash, SCR_SUMMARY_6_KEY_OFFSET, &newoffset) != SCR_SUCCESS) { rc = SCR_FAILURE; } } /* free the send and receive hashes */ scr_hash_delete(&recv); scr_hash_delete(&send); /* get level id, and broadcast it from rank 0, * which we assume to be a reader in all steps */ int level_id = -1; if (valid) { if (scr_hash_util_get_int(hash, SCR_SUMMARY_6_KEY_LEVEL, &level_id) != SCR_SUCCESS) { rc = SCR_FAILURE; } } MPI_Bcast(&level_id, 1, MPI_INT, 0, scr_comm_world); /* check for read errors */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup; } /* set parameters for output or next iteration, * we already took care of updating ptr_fd earlier */ if (valid) { scr_free(ptr_file); } *ptr_valid = newvalid; *ptr_file = newfile; *ptr_offset = newoffset; /* recurse if we still have levels to read */ if (level_id > 1) { rc = scr_fetch_rank2file_map(dataset_path, depth+1, ptr_valid, ptr_file, ptr_offset); } cleanup: /* free the hash */ scr_hash_delete(&hash); return rc; }
/* fetch files listed in hash into specified cache directory, * update filemap and fill in total number of bytes fetched, * returns SCR_SUCCESS if successful */ static int scr_fetch_files_list( const scr_hash* file_list, const char* dir, scr_filemap* map) { /* assume we'll succeed in fetching our files */ int rc = SCR_SUCCESS; /* assume we don't have any files to fetch */ int my_num_files = 0; /* get dataset id */ int id; scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET); scr_dataset_get_id(dataset, &id); /* now iterate through the file list and fetch each file */ scr_hash_elem* file_elem = NULL; scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE); for (file_elem = scr_hash_elem_first(files); file_elem != NULL; file_elem = scr_hash_elem_next(file_elem)) { /* get the filename */ char* file = scr_hash_elem_key(file_elem); /* get a pointer to the hash for this file */ scr_hash* hash = scr_hash_elem_hash(file_elem); /* check whether we are supposed to fetch this file */ /* TODO: this is a hacky way to avoid reading a redundancy file * back in under the assumption that it's an original file, which * breaks our redundancy computation due to a name conflict on * the file names */ scr_hash_elem* no_fetch_hash = scr_hash_elem_get(hash, SCR_SUMMARY_6_KEY_NOFETCH); if (no_fetch_hash != NULL) { continue; } /* increment our file count */ my_num_files++; /* build the destination file name */ scr_path* path_newfile = scr_path_from_str(file); scr_path_basename(path_newfile); scr_path_prepend_str(path_newfile, dir); char* newfile = scr_path_strdup(path_newfile); /* add the file to our filemap and write it to disk before creating * the file, this way we have a record that it may exist before we * actually start to fetch it */ scr_filemap_add_file(map, id, scr_my_rank_world, newfile); scr_filemap_write(scr_map_file, map); /* get the file size */ unsigned long filesize = 0; if (scr_hash_util_get_unsigned_long(hash, SCR_KEY_SIZE, &filesize) != SCR_SUCCESS) { scr_err("Failed to read file size from summary data @ %s:%d", __FILE__, __LINE__ ); rc = SCR_FAILURE; /* free path and string */ scr_free(&newfile); scr_path_delete(&path_newfile); break; } /* check for a complete flag */ int complete = 1; if (scr_hash_util_get_int(hash, SCR_KEY_COMPLETE, &complete) != SCR_SUCCESS) { /* in summary file, the absence of a complete flag on a file * implies the file is complete */ complete = 1; } /* create a new meta data object for this file */ scr_meta* meta = scr_meta_new(); /* set the meta data */ scr_meta_set_filename(meta, newfile); scr_meta_set_filetype(meta, SCR_META_FILE_USER); scr_meta_set_filesize(meta, filesize); scr_meta_set_complete(meta, 1); /* TODODSET: move the ranks field elsewhere, for now it's needed * by scr_index.c */ scr_meta_set_ranks(meta, scr_ranks_world); /* get the crc, if set, and add it to the meta data */ uLong crc; if (scr_hash_util_get_crc32(hash, SCR_KEY_CRC, &crc) == SCR_SUCCESS) { scr_meta_set_crc32(meta, crc); } /* fetch file from containers if they are defined, otherwise fetch * the native file */ scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT); if (segments != NULL) { /* get source path */ char* from_dir; if (scr_hash_util_get_str(file_list, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) { /* fetch file from containers */ if (scr_fetch_file_from_containers(newfile, meta, segments, from_dir) != SCR_SUCCESS) { /* failed to fetch file, mark it as incomplete */ scr_meta_set_complete(meta, 0); rc = SCR_FAILURE; } } else { /* failed to find base dataset directory in file list */ rc = SCR_FAILURE; } } else { /* fetch native file, lookup directory for this file */ char* from_dir; if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) { if (scr_fetch_file(newfile, from_dir, meta) != SCR_SUCCESS) { /* failed to fetch file, mark it as incomplete */ scr_meta_set_complete(meta, 0); rc = SCR_FAILURE; } } else { /* failed to read source directory, mark file as incomplete */ scr_meta_set_complete(meta, 0); rc = SCR_FAILURE; } } /* TODODSET: want to write out filemap before we start to fetch * each file? */ /* mark the file as complete */ scr_filemap_set_meta(map, id, scr_my_rank_world, newfile, meta); /* free the meta data object */ scr_meta_delete(&meta); /* free path and string */ scr_free(&newfile); scr_path_delete(&path_newfile); } /* set the expected number of files for this dataset */ scr_filemap_set_expected_files(map, id, scr_my_rank_world, my_num_files); scr_filemap_write(scr_map_file, map); return rc; }