/* searchs for name and returns a character pointer to its value if set, * returns NULL if not found */ char* scr_param_get(char* name) { char* value = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* TODO: need to strdup here to be safe? */ return getenv(name); } /* otherwise, if parameter is set in user configuration file, return that value */ value = scr_hash_elem_get_first_val(scr_user_hash, name); if (no_user == NULL && value != NULL) { return value; } /* otherwise, if parameter is set in system configuration file, return that value */ value = scr_hash_elem_get_first_val(scr_system_hash, name); if (value != NULL) { return value; } /* parameter not found, return NULL */ return NULL; }
/* searches for name and returns a character pointer to its value if set, * returns NULL if not found */ char* scr_param_get(char* name) { char* value = NULL; /* see if this parameter is one which is restricted from user */ scr_hash* no_user = scr_hash_get(scr_no_user_hash, name); /* if parameter is set in environment, return that value */ if (no_user == NULL && getenv(name) != NULL) { /* we don't just return the getenv value directly because that causes * segfaults on some systems, so instead we add it to a hash and return * the pointer into the hash */ /* try to lookup the value for this name in case we've already cached it */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's not in the hash yet, so add it */ char* tmp_value = strdup(getenv(name)); scr_hash_util_set_str(scr_env_hash, name, tmp_value); scr_free(&tmp_value); /* now issue our lookup again */ if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) { /* it's an error if we don't find it this time */ scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d", name, __FILE__, __LINE__ ); } } return value; } /* otherwise, if parameter is set in user configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_user_hash, name); if (no_user == NULL && value != NULL) { return value; } /* otherwise, if parameter is set in system configuration file, * return that value */ value = scr_hash_elem_get_first_val(scr_system_hash, name); if (value != NULL) { return value; } /* parameter not found, return NULL */ return NULL; }
int main (int argc, char *argv[]) { /* process command line arguments */ struct arglist args; if (!processArgs(argc, argv, &args)) { return 1; } /* create a new hash to hold the file data */ scr_hash* data = scr_hash_new(); if (args.list) { /* if the user wants to list the values, just read the file, print the values, and exit */ scr_halt_read(args.file, data); } else { /* otherwise, we must be setting something */ if (args.set_checkpoints) { printf("Setting CheckpointsLeft\n"); } else if (args.unset_checkpoints) { printf("Unsetting CheckpointsLeft\n"); } if (args.set_after) { printf("Setting ExitAfter\n"); } else if (args.unset_after) { printf("Unsetting ExitAfter\n"); } if (args.set_before) { printf("Setting ExitBefore\n"); } else if (args.unset_before) { printf("Unsetting ExitBefore\n"); } if (args.set_seconds) { printf("Setting HaltSeconds\n"); } else if (args.unset_seconds) { printf("Unsetting HaltSeconds\n"); } if (args.set_reason) { printf("Setting ExitReason\n"); } else if (args.unset_reason) { printf("Unsetting ExitReason\n"); } printf("\n"); scr_halt_sync_and_set(args.file, &args, data); } /* print the current settings */ time_t secs; scr_hash* key = NULL; char* value = NULL; printf("Halt file settings for %s:\n", args.file); int have_one = 0; int exit_before = -1; int halt_seconds = -1; value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_REASON); if (value != NULL ) { printf(" ExitReason: %s\n", value); have_one = 1; } value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_CHECKPOINTS); if (value != NULL) { int checkpoints_left = atoi(value); printf(" CheckpointsLeft: %d\n", checkpoints_left); have_one = 1; } value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_AFTER); if (value != NULL) { secs = (time_t) atoi(value); printf(" ExitAfter: %s", asctime(localtime(&secs))); have_one = 1; } value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_BEFORE); if (value != NULL) { exit_before = atoi(value); secs = (time_t) exit_before; printf(" ExitBefore: %s", asctime(localtime(&secs))); have_one = 1; } value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_SECONDS); if (value != NULL) { halt_seconds = atoi(value); printf(" HaltSeconds: %d\n", halt_seconds); have_one = 1; } if (halt_seconds != -1 && exit_before != -1) { secs = (time_t) exit_before - halt_seconds; printf(" ExitBefore - HaltSeconds: %s", asctime(localtime(&secs))); have_one = 1; } if (!have_one) { printf(" None\n"); } /* delete the hash holding the file data */ scr_hash_delete(data); return 0; }
/* read the transfer file and set our global variables to match */ scr_hash* read_transfer_file() { char* value = NULL; /* get a new hash to store the file data */ scr_hash* hash = scr_hash_new(); /* open transfer file with lock */ scr_hash_read_with_lock(scr_transfer_file, hash); /* read in our allowed bandwidth value */ value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_BW); if (value != NULL) { double bw; if (scr_atod(value, &bw) == SCR_SUCCESS) { /* got a new bandwidth value, set our global variable */ bytes_per_second = bw; } else { /* could not interpret bandwidth value */ scr_err("scr_transfer: Ignoring invalid BW value in %s @ %s:%d", scr_transfer_file, __FILE__, __LINE__ ); } } else { /* couldn't find a BW field, so disable this limit */ bytes_per_second = 0.0; } /* read in our allowed percentage of runtime value */ value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_PERCENT); if (value != NULL) { double percent; if (scr_atod(value, &percent) == SCR_SUCCESS) { /* got a new bandwidth value, set our global variable */ percent_runtime = percent / 100.0; } else { /* could not interpret bandwidth value */ scr_err("scr_transfer: Ignoring invalid PERCENT value in %s @ %s:%d", scr_transfer_file, __FILE__, __LINE__ ); } } else { /* couldn't find a PERCENT field, so disable this limit */ percent_runtime = 0.0; } /* check for DONE flag */ int done = 0; scr_hash* done_hash = scr_hash_get_kv(hash, SCR_TRANSFER_KEY_FLAG, SCR_TRANSFER_KEY_FLAG_DONE); if (done_hash != NULL) { done = 1; } /* check for latest command */ state = STOPPED; value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_COMMAND); if (value != NULL) { if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_EXIT) == 0) { /* close files and exit */ keep_running = 0; } else if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_STOP) == 0) { /* just stop, nothing else to do here */ } else if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_RUN) == 0) { /* found the RUN command, if the DONE flag is not set, * set our state to running and update the transfer file */ if (!done) { state = RUNNING; set_transfer_file_state(SCR_TRANSFER_KEY_STATE_RUN, 0); } } else { scr_err("scr_transfer: Unknown command %s in %s @ %s:%d", value, scr_transfer_file, __FILE__, __LINE__ ); } } /* ensure that our current state is always recorded in the file * (the file may have been deleted since we last wrote our state * to it) */ value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_STATE); if (value == NULL) { if (state == STOPPED) { set_transfer_file_state(SCR_TRANSFER_KEY_STATE_STOP, 0); } else if (state == RUNNING) { set_transfer_file_state(SCR_TRANSFER_KEY_STATE_RUN, 0); } else { scr_err("scr_transfer: Unknown state %d @ %s:%d", state, __FILE__, __LINE__ ); } } return hash; }
int main (int argc, char *argv[]) { /* process command line arguments */ struct arglist args; if (!process_args(argc, argv, &args)) { return 1; } /* determine the number of bytes we need to hold the full name of the nodes file */ int filelen = snprintf(NULL, 0, "%s/nodes.scr", args.dir); filelen++; /* add one for the terminating NUL char */ /* allocate space to store the filename */ char* file = NULL; if (filelen > 0) { file = (char*) malloc(filelen); } if (file == NULL) { scr_err("%s: Failed to allocate storage to store nodes file name @ %s:%d", PROG, __FILE__, __LINE__ ); return 1; } /* build the full file name */ int n = snprintf(file, filelen, "%s/nodes.scr", args.dir); if (n >= filelen) { scr_err("%s: Flush file name is too long (need %d bytes, %d byte buffer) @ %s:%d", PROG, n, filelen, __FILE__, __LINE__ ); free(file); return 1; } /* assume we'll fail */ int rc = 1; /* create a new hash to hold the file data */ scr_hash* hash = scr_hash_new(); /* read in our nodes file */ if (scr_hash_read(file, hash) != SCR_SUCCESS) { /* failed to read the nodes file */ goto cleanup; } /* lookup the value associated with the NODES key */ char* nodes_str = scr_hash_elem_get_first_val(hash, SCR_NODES_KEY_NODES); if (nodes_str != NULL) { printf("%s\n", nodes_str); rc = 0; } else { printf("0\n"); } cleanup: /* delete the hash holding the nodes file data */ scr_hash_delete(hash); /* free off our file name storage */ if (file != NULL) { free(file); file = NULL; } /* return appropriate exit code */ return rc; }
int main(int argc, char* argv[]) { /* print usage if not enough arguments were given */ if (argc < 2) { printf("Usage: scr_inspect_cache <cntldir>\n"); return 1; } scr_path* scr_master_map_file = scr_path_from_str(strdup(argv[1])); /* get my hostname */ if (gethostname(scr_my_hostname, sizeof(scr_my_hostname)) != 0) { scr_err("scr_inspect_cache: Call to gethostname failed @ %s:%d", __FILE__, __LINE__ ); return 1; } /* read in the master map */ scr_hash* hash = scr_hash_new(); scr_hash_read_path(scr_master_map_file, hash); /* create an empty filemap */ scr_filemap* map = scr_filemap_new(); /* for each filemap listed in the master map */ scr_hash_elem* elem; for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap")); elem != NULL; elem = scr_hash_elem_next(elem)) { /* get the filename of this filemap */ char* file = scr_hash_elem_key(elem); /* read in the filemap */ scr_filemap* tmp_map = scr_filemap_new(); scr_path* path_file = scr_path_from_str(file); scr_filemap_read(path_file, tmp_map); scr_path_delete(&path_file); /* merge it with local 0 filemap */ scr_filemap_merge(map, tmp_map); /* delete filemap */ scr_filemap_delete(&tmp_map); } /* scan each file for each rank of each dataset */ scr_hash_elem* dset_elem; for (dset_elem = scr_filemap_first_dataset(map); dset_elem != NULL; dset_elem = scr_hash_elem_next(dset_elem)) { /* get dataset id */ int dset = scr_hash_elem_key_int(dset_elem); scr_hash_elem* rank_elem; for (rank_elem = scr_filemap_first_rank_by_dataset(map, dset); rank_elem != NULL; rank_elem = scr_hash_elem_next(rank_elem)) { /* get rank id */ int rank = scr_hash_elem_key_int(rank_elem); int missing_file = 0; int expected = scr_filemap_get_expected_files(map, dset, rank); int num = scr_filemap_num_files(map, dset, rank); if (expected == num) { /* first time through the file list, check that we have each file */ scr_hash_elem* file_elem = NULL; for (file_elem = scr_filemap_first_file(map, dset, rank); file_elem != NULL; file_elem = scr_hash_elem_next(file_elem)) { /* get filename */ char* file = scr_hash_elem_key(file_elem); /* check that we can read the file */ if (! scr_bool_have_file(map, dset, rank, file)) { missing_file = 1; scr_dbg(1, "File is unreadable or incomplete: Dataset %d, Rank %d, File: %s", dset, rank, file ); } } } else { missing_file = 1; } /* TODO: print partner names */ /* if we're not missing a file for rank, print this info out */ if (! missing_file) { scr_hash* desc = scr_hash_new(); scr_filemap_get_desc(map, dset, rank, desc); char* type = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_TYPE); char* groups_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUPS); char* group_id_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_ID); char* group_size_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_SIZE); char* group_rank_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_RANK); if (type != NULL && groups_str != NULL && group_id_str != NULL && group_size_str != NULL && group_rank_str != NULL) { /* we already have a group id and rank, use that to rebuild the communicator */ int groups = atoi(groups_str); int group_id = atoi(group_id_str); int group_size = atoi(group_size_str); int group_rank = atoi(group_rank_str); printf("DSET=%d RANK=%d TYPE=%s GROUPS=%d GROUP_ID=%d GROUP_SIZE=%d GROUP_RANK=%d FILES=1\n", dset, rank, type, groups, group_id, group_size, group_rank ); } } } } scr_path_delete(&scr_master_map_file); return 0; }
/* verify the hash is a valid hash for a version 5 summary file */ static int scr_summary_check_v5(scr_hash* hash) { /* check that the summary file version is something we support */ int version; if (scr_hash_util_get_int(hash, SCR_SUMMARY_KEY_VERSION, &version) != SCR_SUCCESS) { /* couldn't find version number */ scr_err("Failed to read version number in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } if (version != SCR_SUMMARY_FILE_VERSION_5) { /* invalid version number */ scr_err("Found version number %d when %d was expected in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* check that we have exactly one checkpoint */ scr_hash* ckpt_hash = scr_hash_get(hash, SCR_SUMMARY_5_KEY_CKPT); if (scr_hash_size(ckpt_hash) != 1) { scr_err("More than one checkpoint found in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* get the first (and only) checkpoint id */ char* ckpt_str = scr_hash_elem_get_first_val(hash, SCR_SUMMARY_5_KEY_CKPT); scr_hash* ckpt = scr_hash_get(ckpt_hash, ckpt_str); /* check that the complete string is set and is set to 1 */ int complete; if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_COMPLETE, &complete) != SCR_SUCCESS) { /* could not find complete value (assume it's incomplete) */ return SCR_FAILURE; } if (complete != 1) { /* checkpoint is marked as incomplete */ return SCR_FAILURE; } /* read in the the number of ranks for this checkpoint */ int ranks; if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_RANKS, &ranks) != SCR_SUCCESS) { scr_err("Failed to read number of ranks in summary file @ %s:%d", __FILE__, __LINE__ ); return SCR_FAILURE; } /* check that the number of ranks matches the number we're currently running with */ if (ranks != scr_ranks_world) { scr_err("Number of ranks %d that wrote checkpoint does not match current number of ranks %d @ %s:%d", ranks, scr_ranks_world, __FILE__, __LINE__ ); return SCR_FAILURE; } return SCR_SUCCESS; }