/* writes the specified command to the transfer file */ int set_transfer_file_state(char* s, int done) { /* get a hash to store file data */ scr_hash* hash = scr_hash_new(); /* attempt to read the file transfer file */ int fd = -1; if (scr_hash_lock_open_read(scr_transfer_file, &fd, hash) == SCR_SUCCESS) { /* set the state */ scr_hash_util_set_str(hash, SCR_TRANSFER_KEY_STATE, s); /* set the flag if we're done */ if (done) { scr_hash_set_kv(hash, SCR_TRANSFER_KEY_FLAG, SCR_TRANSFER_KEY_FLAG_DONE); } /* write the hash back to the file */ scr_hash_write_close_unlock(scr_transfer_file, &fd, hash); } /* delete the hash */ scr_hash_delete(&hash); return SCR_SUCCESS; }
/* searches for name and returns a character pointer to its value if set, * returns NULL if not found */ char* scr_param_get(char* name) { char* value = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* we don't just return the getenv value directly because that causes * segfaults on some systems, so instead we add it to a hash and return * the pointer into the hash */ /* try to lookup the value for this name in case we've already cached it */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's not in the hash yet, so add it */ char* tmp_value = strdup(getenv(name)); scr_hash_util_set_str(scr_env_hash, name, tmp_value); scr_free(&tmp_value); /* now issue our lookup again */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's an error if we don't find it this time */ scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d", name, __FILE__, __LINE__ ); } } return value; } /* otherwise, if parameter is set in user configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_user_hash, name); if (no_user == NULL && value != NULL) { return value; } /* otherwise, if parameter is set in system configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_system_hash, name); if (value != NULL) { return value; } /* parameter not found, return NULL */ return NULL; }
/* read contents of summary file */ static int scr_fetch_summary( const char* summary_dir, scr_hash* file_list) { /* assume that we won't succeed in our fetch attempt */ int rc = SCR_SUCCESS; /* check whether summary file exists and is readable */ if (scr_my_rank_world == 0) { /* check that we can access the directory */ if (scr_file_is_readable(summary_dir) != SCR_SUCCESS) { scr_err("Failed to access summary directory %s @ %s:%d", summary_dir, __FILE__, __LINE__ ); rc = SCR_FAILURE; } } /* broadcast success code from rank 0 */ MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world); if (rc != SCR_SUCCESS) { return rc; } /* add path to file list */ scr_hash_util_set_str(file_list, SCR_KEY_PATH, summary_dir); /* build path to summary file */ scr_path* dataset_path = scr_path_from_str(summary_dir); scr_path* meta_path = scr_path_dup(dataset_path); scr_path_append_str(meta_path, ".scr"); scr_path_reduce(meta_path); /* rank 0 reads the summary file */ scr_hash* header = scr_hash_new(); if (scr_my_rank_world == 0) { /* build path to summary file */ scr_path* summary_path = scr_path_dup(meta_path); scr_path_append_str(summary_path, "summary.scr"); const char* summary_file = scr_path_strdup(summary_path); /* open file for reading */ int fd = scr_open(summary_file, O_RDONLY); if (fd >= 0) { /* read summary hash */ ssize_t header_size = scr_hash_read_fd(summary_file, fd, header); if (header_size < 0) { rc = SCR_FAILURE; } /* TODO: check that the version is correct */ /* close the file */ scr_close(summary_file, fd); } else { scr_err("Failed to open summary file %s @ %s:%d", summary_file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* free summary path and string */ scr_free(&summary_file); scr_path_delete(&summary_path); } /* broadcast success code from rank 0 */ MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world); if (rc != SCR_SUCCESS) { goto cleanup; } /* broadcast the summary hash */ scr_hash_bcast(header, 0, scr_comm_world); /* extract and record the datast in file list */ scr_hash* dataset_hash = scr_hash_new(); scr_dataset* dataset = scr_hash_get(header, SCR_SUMMARY_6_KEY_DATASET); scr_hash_merge(dataset_hash, dataset); scr_hash_set(file_list, SCR_SUMMARY_6_KEY_DATASET, dataset_hash); /* build path to rank2file map */ scr_path* rank2file_path = scr_path_dup(meta_path); scr_path_append_str(rank2file_path, "rank2file.scr"); /* fetch file names and offsets containing file hash data */ int valid = 0; char* file = NULL; unsigned long offset = 0; if (scr_my_rank_world == 0) { /* rank 0 is only valid reader to start with */ valid = 1; file = scr_path_strdup(rank2file_path); offset = 0; } if (scr_fetch_rank2file_map(dataset_path, 1, &valid, &file, &offset) != SCR_SUCCESS) { rc = SCR_FAILURE; } /* create hashes to exchange data */ scr_hash* send = scr_hash_new(); scr_hash* recv = scr_hash_new(); /* read data from file */ if (valid) { /* open file if necessary */ int fd = scr_open(file, O_RDONLY); if (fd >= 0) { /* create hash to hold file contents */ scr_hash* save = scr_hash_new(); /* read hash from file */ scr_lseek(file, fd, offset, SEEK_SET); ssize_t readsize = scr_hash_read_fd(file, fd, save); if (readsize < 0) { scr_err("Failed to read rank2file map file %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* check that the number of ranks match */ int ranks = 0; scr_hash_util_get_int(save, SCR_SUMMARY_6_KEY_RANKS, &ranks); if (ranks != scr_ranks_world) { scr_err("Invalid number of ranks in %s, got %d expected %d @ %s:%d", file, ranks, scr_ranks_world, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* delete current send hash, set it to values from file, * delete file hash */ scr_hash_delete(&send); send = scr_hash_extract(save, SCR_SUMMARY_6_KEY_RANK); scr_hash_delete(&save); /* close the file */ scr_close(file, fd); } else { scr_err("Failed to open rank2file map %s @ %s:%d", file, __FILE__, __LINE__ ); rc = SCR_FAILURE; } /* delete file name string */ scr_free(&file); } /* check that everyone read the data ok */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup_hashes; } /* scatter to groups */ scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT); /* iterate over the ranks that sent data to us, and set up our * list of files */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(recv); elem != NULL; elem = scr_hash_elem_next(elem)) { /* the key is the source rank, which we don't care about, * the info we need is in the element hash */ scr_hash* elem_hash = scr_hash_elem_hash(elem); /* get pointer to file hash */ scr_hash* file_hash = scr_hash_get(elem_hash, SCR_SUMMARY_6_KEY_FILE); if (file_hash != NULL) { /* TODO: parse summary file format */ scr_hash_merge(file_list, elem_hash); } else { rc = SCR_FAILURE; } } /* fill in file list parameters */ if (rc == SCR_SUCCESS) { /* if we're not using containers, add PATH entry for each of our * files */ scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE); for (elem = scr_hash_elem_first(files); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the file name */ char* file = scr_hash_elem_key(elem); /* combine the file name with the summary directory to build a * full path to the file */ scr_path* path_full = scr_path_dup(dataset_path); scr_path_append_str(path_full, file); /* subtract off last component to get just the path */ scr_path_dirname(path_full); char* path = scr_path_strdup(path_full); /* record path in file list */ scr_hash* hash = scr_hash_elem_hash(elem); scr_hash_util_set_str(hash, SCR_KEY_PATH, path); /* free the path and string */ scr_free(&path); scr_path_delete(&path_full); } } /* check that everyone read the data ok */ if (! scr_alltrue(rc == SCR_SUCCESS)) { rc = SCR_FAILURE; goto cleanup_hashes; } cleanup_hashes: /* delete send and receive hashes */ scr_hash_delete(&recv); scr_hash_delete(&send); /* free string and path for rank2file map */ scr_path_delete(&rank2file_path); cleanup: /* free the header hash */ scr_hash_delete(&header); /* free path for dataset directory */ scr_path_delete(&meta_path); scr_path_delete(&dataset_path); return rc; }
/* copy files to a partner node */ static int scr_reddesc_apply_partner( scr_filemap* map, const scr_reddesc* c, int id) { int rc = SCR_SUCCESS; /* get pointer to partner state structure */ scr_reddesc_partner* state = (scr_reddesc_partner*) c->copy_state; /* get a list of our files */ int numfiles = 0; char** files = NULL; scr_filemap_list_files(map, id, scr_my_rank_world, &numfiles, &files); /* first, determine how many files we'll be sending and receiving * with our partners */ MPI_Status status; int send_num = numfiles; int recv_num = 0; MPI_Sendrecv( &send_num, 1, MPI_INT, state->rhs_rank, 0, &recv_num, 1, MPI_INT, state->lhs_rank, 0, c->comm, &status ); /* record how many files our partner will send */ scr_filemap_set_expected_files(map, id, state->lhs_rank_world, recv_num); /* remember which node our partner is on (needed for scavenge) */ scr_hash* flushdesc = scr_hash_new(); scr_filemap_get_flushdesc(map, id, state->lhs_rank_world, flushdesc); scr_hash_util_set_int(flushdesc, SCR_SCAVENGE_KEY_PRESERVE, scr_preserve_directories); scr_hash_util_set_int(flushdesc, SCR_SCAVENGE_KEY_CONTAINER, scr_use_containers); scr_hash_util_set_str(flushdesc, SCR_SCAVENGE_KEY_PARTNER, state->lhs_hostname); scr_filemap_set_flushdesc(map, id, state->lhs_rank_world, flushdesc); scr_hash_delete(&flushdesc); /* record partner's redundancy descriptor hash */ scr_hash* lhs_desc_hash = scr_hash_new(); scr_hash* my_desc_hash = scr_hash_new(); scr_reddesc_store_to_hash(c, my_desc_hash); scr_hash_sendrecv(my_desc_hash, state->rhs_rank, lhs_desc_hash, state->lhs_rank, c->comm); scr_filemap_set_desc(map, id, state->lhs_rank_world, lhs_desc_hash); scr_hash_delete(&my_desc_hash); scr_hash_delete(&lhs_desc_hash); /* store this info in our filemap before we receive any files */ scr_filemap_write(scr_map_file, map); /* define directory to receive partner file in */ char* dir = scr_cache_dir_get(c, id); /* for each potential file, step through a call to swap */ while (send_num > 0 || recv_num > 0) { /* assume we won't send or receive in this step */ int send_rank = MPI_PROC_NULL; int recv_rank = MPI_PROC_NULL; /* if we have a file left to send, * get the filename and destination rank */ char* file = NULL; if (send_num > 0) { int i = numfiles - send_num; file = files[i]; send_rank = state->rhs_rank; send_num--; } /* if we have a file left to receive, get the rank */ if (recv_num > 0) { recv_rank = state->lhs_rank; recv_num--; } /* exhange file names with partners */ char file_partner[SCR_MAX_FILENAME]; scr_swap_file_names(file, send_rank, file_partner, sizeof(file_partner), recv_rank, dir, c->comm); /* if we'll receive a file, record the name of our partner's * file in the filemap */ if (recv_rank != MPI_PROC_NULL) { scr_filemap_add_file(map, id, state->lhs_rank_world, file_partner); scr_filemap_write(scr_map_file, map); } /* get meta data of file we're sending */ scr_meta* send_meta = scr_meta_new(); scr_filemap_get_meta(map, id, scr_my_rank_world, file, send_meta); /* exhange files with partners */ scr_meta* recv_meta = scr_meta_new(); if (scr_swap_files(COPY_FILES, file, send_meta, send_rank, file_partner, recv_meta, recv_rank, c->comm) != SCR_SUCCESS) { rc = SCR_FAILURE; } scr_filemap_set_meta(map, id, state->lhs_rank_world, file_partner, recv_meta); /* free meta data for these files */ scr_meta_delete(&recv_meta); scr_meta_delete(&send_meta); } /* free cache directory string */ scr_free(&dir); /* write out the updated filemap */ scr_filemap_write(scr_map_file, map); /* free our list of files */ scr_free(&files); return rc; }