Exemplo n.º 1
0
/* searchs for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* TODO: need to strdup here to be safe? */
    return getenv(name);
  }

  /* otherwise, if parameter is set in user configuration file, return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file, return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Exemplo n.º 2
0
/* searches for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* we don't just return the getenv value directly because that causes
     * segfaults on some systems, so instead we add it to a hash and return
     * the pointer into the hash */

    /* try to lookup the value for this name in case we've already cached it */
    if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
      /* it's not in the hash yet, so add it */
      char* tmp_value = strdup(getenv(name));
      scr_hash_util_set_str(scr_env_hash, name, tmp_value);
      scr_free(&tmp_value);

      /* now issue our lookup again */
      if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
        /* it's an error if we don't find it this time */
        scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d",
          name, __FILE__, __LINE__
        );
      }
    }
    
    return value;
  }

  /* otherwise, if parameter is set in user configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Exemplo n.º 3
0
int main (int argc, char *argv[])
{
  /* process command line arguments */
  struct arglist args;
  if (!processArgs(argc, argv, &args)) {
    return 1;
  }

  /* create a new hash to hold the file data */
  scr_hash* data = scr_hash_new();

  if (args.list) {
    /* if the user wants to list the values, just read the file, print the values, and exit */
    scr_halt_read(args.file, data);
  } else {
    /* otherwise, we must be setting something */
    if (args.set_checkpoints) {
      printf("Setting CheckpointsLeft\n");
    } else if (args.unset_checkpoints) {
      printf("Unsetting CheckpointsLeft\n");
    }

    if (args.set_after) {
      printf("Setting ExitAfter\n");
    } else if (args.unset_after) {
      printf("Unsetting ExitAfter\n");
    }

    if (args.set_before) {
      printf("Setting ExitBefore\n");
    } else if (args.unset_before) {
      printf("Unsetting ExitBefore\n");
    }

    if (args.set_seconds) {
      printf("Setting HaltSeconds\n");
    } else if (args.unset_seconds) {
      printf("Unsetting HaltSeconds\n");
    }

    if (args.set_reason) {
      printf("Setting ExitReason\n");
    } else if (args.unset_reason) {
      printf("Unsetting ExitReason\n");
    }

    printf("\n");

    scr_halt_sync_and_set(args.file, &args, data);
  }

  /* print the current settings */
  time_t secs;
  scr_hash* key = NULL;
  char* value = NULL;
  printf("Halt file settings for %s:\n", args.file);
  int have_one = 0;
  int exit_before = -1;
  int halt_seconds = -1;

  value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_REASON);
  if (value != NULL ) {
    printf("  ExitReason:      %s\n", value);
    have_one = 1;
  }

  value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_CHECKPOINTS);
  if (value != NULL) {
    int checkpoints_left = atoi(value);
    printf("  CheckpointsLeft: %d\n", checkpoints_left);
    have_one = 1;
  }

  value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_AFTER);
  if (value != NULL) {
    secs = (time_t) atoi(value);
    printf("  ExitAfter:       %s", asctime(localtime(&secs)));
    have_one = 1;
  }

  value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_EXIT_BEFORE);
  if (value != NULL) {
    exit_before = atoi(value);
    secs = (time_t) exit_before;
    printf("  ExitBefore:      %s", asctime(localtime(&secs)));
    have_one = 1;
  }

  value = scr_hash_elem_get_first_val(data, SCR_HALT_KEY_SECONDS);
  if (value != NULL) {
    halt_seconds = atoi(value);
    printf("  HaltSeconds:     %d\n", halt_seconds);
    have_one = 1;
  }

  if (halt_seconds != -1 && exit_before != -1) {
    secs = (time_t) exit_before - halt_seconds;
    printf("  ExitBefore - HaltSeconds: %s", asctime(localtime(&secs)));
    have_one = 1;
  }

  if (!have_one) {
    printf("  None\n");
  }

  /* delete the hash holding the file data */
  scr_hash_delete(data);

  return 0;
}
Exemplo n.º 4
0
/* read the transfer file and set our global variables to match */
scr_hash* read_transfer_file()
{
  char* value = NULL;

  /* get a new hash to store the file data */
  scr_hash* hash = scr_hash_new();

  /* open transfer file with lock */
  scr_hash_read_with_lock(scr_transfer_file, hash);

  /* read in our allowed bandwidth value */
  value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_BW);
  if (value != NULL) {
    double bw;
    if (scr_atod(value, &bw) == SCR_SUCCESS) {
      /* got a new bandwidth value, set our global variable */
      bytes_per_second = bw;
    } else {
      /* could not interpret bandwidth value */
      scr_err("scr_transfer: Ignoring invalid BW value in %s @ %s:%d",
              scr_transfer_file, __FILE__, __LINE__
      );
    }
  } else {
    /* couldn't find a BW field, so disable this limit */
    bytes_per_second = 0.0;
  }

  /* read in our allowed percentage of runtime value */
  value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_PERCENT);
  if (value != NULL) {
    double percent;
    if (scr_atod(value, &percent) == SCR_SUCCESS) {
      /* got a new bandwidth value, set our global variable */
      percent_runtime = percent / 100.0;
    } else {
      /* could not interpret bandwidth value */
      scr_err("scr_transfer: Ignoring invalid PERCENT value in %s @ %s:%d",
              scr_transfer_file, __FILE__, __LINE__
      );
    }
  } else {
    /* couldn't find a PERCENT field, so disable this limit */
    percent_runtime = 0.0;
  }

  /* check for DONE flag */
  int done = 0;
  scr_hash* done_hash = scr_hash_get_kv(hash, SCR_TRANSFER_KEY_FLAG, SCR_TRANSFER_KEY_FLAG_DONE);
  if (done_hash != NULL) {
    done = 1;
  }

  /* check for latest command */
  state = STOPPED;
  value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_COMMAND);
  if (value != NULL) {
    if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_EXIT) == 0) {
      /* close files and exit */
      keep_running = 0;
    } else if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_STOP) == 0) {
      /* just stop, nothing else to do here */
    } else if (strcmp(value, SCR_TRANSFER_KEY_COMMAND_RUN) == 0) {
      /* found the RUN command, if the DONE flag is not set,
       * set our state to running and update the transfer file */
      if (!done) {
        state = RUNNING;
        set_transfer_file_state(SCR_TRANSFER_KEY_STATE_RUN, 0);
      }
    } else {
      scr_err("scr_transfer: Unknown command %s in %s @ %s:%d",
              value, scr_transfer_file, __FILE__, __LINE__
      );
    }
  }

  /* ensure that our current state is always recorded in the file
   * (the file may have been deleted since we last wrote our state
   * to it) */
  value = scr_hash_elem_get_first_val(hash, SCR_TRANSFER_KEY_STATE);
  if (value == NULL) {
    if (state == STOPPED) {
      set_transfer_file_state(SCR_TRANSFER_KEY_STATE_STOP, 0);
    } else if (state == RUNNING) {
      set_transfer_file_state(SCR_TRANSFER_KEY_STATE_RUN, 0);
    } else {
      scr_err("scr_transfer: Unknown state %d @ %s:%d",
              state, __FILE__, __LINE__
      );
    }
  }

  return hash;
}
Exemplo n.º 5
0
int main (int argc, char *argv[])
{
  /* process command line arguments */
  struct arglist args;
  if (!process_args(argc, argv, &args)) {
    return 1;
  }

  /* determine the number of bytes we need to hold the full name of the nodes file */
  int filelen = snprintf(NULL, 0, "%s/nodes.scr", args.dir);
  filelen++; /* add one for the terminating NUL char */

  /* allocate space to store the filename */
  char* file = NULL;
  if (filelen > 0) {
    file = (char*) malloc(filelen);
  }
  if (file == NULL) {
    scr_err("%s: Failed to allocate storage to store nodes file name @ %s:%d",
            PROG, __FILE__, __LINE__
    );
    return 1;
  }

  /* build the full file name */
  int n = snprintf(file, filelen, "%s/nodes.scr", args.dir);
  if (n >= filelen) {
    scr_err("%s: Flush file name is too long (need %d bytes, %d byte buffer) @ %s:%d",
            PROG, n, filelen, __FILE__, __LINE__
    );
    free(file);
    return 1;
  }

  /* assume we'll fail */
  int rc = 1;

  /* create a new hash to hold the file data */
  scr_hash* hash = scr_hash_new();

  /* read in our nodes file */
  if (scr_hash_read(file, hash) != SCR_SUCCESS) {
    /* failed to read the nodes file */
    goto cleanup;
  }

  /* lookup the value associated with the NODES key */
  char* nodes_str = scr_hash_elem_get_first_val(hash, SCR_NODES_KEY_NODES);
  if (nodes_str != NULL) {
    printf("%s\n", nodes_str);
    rc = 0;
  } else {
    printf("0\n");
  }

cleanup:
  /* delete the hash holding the nodes file data */
  scr_hash_delete(hash);

  /* free off our file name storage */
  if (file != NULL) {
    free(file);
    file = NULL;
  }

  /* return appropriate exit code */
  return rc;
}
Exemplo n.º 6
0
int main(int argc, char* argv[])
{
  /* print usage if not enough arguments were given */
  if (argc < 2) {
    printf("Usage: scr_inspect_cache <cntldir>\n");
    return 1;
  }

  scr_path* scr_master_map_file = scr_path_from_str(strdup(argv[1]));

  /* get my hostname */
  if (gethostname(scr_my_hostname, sizeof(scr_my_hostname)) != 0) {
    scr_err("scr_inspect_cache: Call to gethostname failed @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the master map */
  scr_hash* hash = scr_hash_new();
  scr_hash_read_path(scr_master_map_file, hash);

  /* create an empty filemap */
  scr_filemap* map = scr_filemap_new();

  /* for each filemap listed in the master map */
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap"));
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename of this filemap */
    char* file = scr_hash_elem_key(elem);

    /* read in the filemap */
    scr_filemap* tmp_map = scr_filemap_new();
    scr_path* path_file = scr_path_from_str(file);
    scr_filemap_read(path_file, tmp_map);
    scr_path_delete(&path_file);

    /* merge it with local 0 filemap */
    scr_filemap_merge(map, tmp_map);

    /* delete filemap */
    scr_filemap_delete(&tmp_map);
  }

  /* scan each file for each rank of each dataset */
  scr_hash_elem* dset_elem;
  for (dset_elem = scr_filemap_first_dataset(map);
       dset_elem != NULL;
       dset_elem = scr_hash_elem_next(dset_elem))
  {
    /* get dataset id */
    int dset = scr_hash_elem_key_int(dset_elem);

    scr_hash_elem* rank_elem;
    for (rank_elem = scr_filemap_first_rank_by_dataset(map, dset);
         rank_elem != NULL;
         rank_elem = scr_hash_elem_next(rank_elem))
    {
      /* get rank id */
      int rank = scr_hash_elem_key_int(rank_elem);

      int missing_file = 0;
      int expected = scr_filemap_get_expected_files(map, dset, rank);
      int num      = scr_filemap_num_files(map, dset, rank);
      if (expected == num) {
        /* first time through the file list, check that we have each file */
        scr_hash_elem* file_elem = NULL;
        for (file_elem = scr_filemap_first_file(map, dset, rank);
             file_elem != NULL;
             file_elem = scr_hash_elem_next(file_elem))
        {
          /* get filename */
          char* file = scr_hash_elem_key(file_elem);

          /* check that we can read the file */
          if (! scr_bool_have_file(map, dset, rank, file)) {
              missing_file = 1;
              scr_dbg(1, "File is unreadable or incomplete: Dataset %d, Rank %d, File: %s",
                dset, rank, file
              );
          }
        }
      } else {
        missing_file = 1;
      }

      /* TODO: print partner names */
      /* if we're not missing a file for rank, print this info out */
      if (! missing_file) {
        scr_hash* desc = scr_hash_new();
        scr_filemap_get_desc(map, dset, rank, desc);
        char* type           = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_TYPE);
        char* groups_str     = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUPS);
        char* group_id_str   = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_ID);
        char* group_size_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_SIZE);
        char* group_rank_str = scr_hash_elem_get_first_val(desc, SCR_CONFIG_KEY_GROUP_RANK);
        if (type != NULL && groups_str != NULL && group_id_str != NULL && group_size_str != NULL && group_rank_str != NULL) {
          /* we already have a group id and rank, use that to rebuild the communicator */
          int groups     = atoi(groups_str);
          int group_id   = atoi(group_id_str);
          int group_size = atoi(group_size_str);
          int group_rank = atoi(group_rank_str);
          printf("DSET=%d RANK=%d TYPE=%s GROUPS=%d GROUP_ID=%d GROUP_SIZE=%d GROUP_RANK=%d FILES=1\n",
            dset, rank, type, groups, group_id, group_size, group_rank
          );
        }
      }
    }
  }

  scr_path_delete(&scr_master_map_file);

  return 0;
}
Exemplo n.º 7
0
/* verify the hash is a valid hash for a version 5 summary file */
static int scr_summary_check_v5(scr_hash* hash)
{
  /* check that the summary file version is something we support */
  int version;
  if (scr_hash_util_get_int(hash, SCR_SUMMARY_KEY_VERSION, &version) != SCR_SUCCESS) {
    /* couldn't find version number */
    scr_err("Failed to read version number in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  if (version != SCR_SUMMARY_FILE_VERSION_5) {
    /* invalid version number */
    scr_err("Found version number %d when %d was expected in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that we have exactly one checkpoint */
  scr_hash* ckpt_hash = scr_hash_get(hash, SCR_SUMMARY_5_KEY_CKPT);
  if (scr_hash_size(ckpt_hash) != 1) {
    scr_err("More than one checkpoint found in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* get the first (and only) checkpoint id */
  char* ckpt_str = scr_hash_elem_get_first_val(hash, SCR_SUMMARY_5_KEY_CKPT);
  scr_hash* ckpt = scr_hash_get(ckpt_hash, ckpt_str);

  /* check that the complete string is set and is set to 1 */
  int complete;
  if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_COMPLETE, &complete) != SCR_SUCCESS) {
    /* could not find complete value (assume it's incomplete) */
    return SCR_FAILURE;
  }
  if (complete != 1) {
    /* checkpoint is marked as incomplete */
    return SCR_FAILURE;
  }

  /* read in the the number of ranks for this checkpoint */
  int ranks;
  if (scr_hash_util_get_int(ckpt, SCR_SUMMARY_5_KEY_RANKS, &ranks) != SCR_SUCCESS) {
    scr_err("Failed to read number of ranks in summary file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that the number of ranks matches the number we're currently running with */
  if (ranks != scr_ranks_world) {
    scr_err("Number of ranks %d that wrote checkpoint does not match current number of ranks %d @ %s:%d",
      ranks, scr_ranks_world, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  return SCR_SUCCESS;
}