Beispiel #1
0
/* recursively create directory and subdirectories */
int scr_mkdir(const char* dir, mode_t mode)
{
  int rc = SCR_SUCCESS;

  /* With dirname, either the original string may be modified or the function may return a
   * pointer to static storage which will be overwritten by the next call to dirname,
   * so we need to strdup both the argument and the return string. */

  /* extract leading path from dir = full path - basename */
  char* dircopy = strdup(dir);
  char* path    = strdup(dirname(dircopy));

  /* if we can read path or path=="." or path=="/", then there's nothing to do,
   * otherwise, try to create it */
  if (access(path, R_OK) < 0 &&
      strcmp(path,".") != 0  &&
      strcmp(path,"/") != 0)
  {
    rc = scr_mkdir(path, mode);
  }

  /* if we can write to path, try to create subdir within path */
  if (access(path, W_OK) == 0 && rc == SCR_SUCCESS) {
    int tmp_rc = mkdir(dir, mode);
    if (tmp_rc < 0) {
      if (errno == EEXIST) {
        /* don't complain about mkdir for a directory that already exists */
        scr_free(&dircopy);
        scr_free(&path);
        return SCR_SUCCESS;
      } else {
        scr_err("Creating directory: mkdir(%s, %x) path=%s errno=%d %s @ %s:%d",
                dir, mode, path, errno, strerror(errno), __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }
    }
  } else {
    scr_err("Cannot write to directory: %s @ %s:%d",
            path, __FILE__, __LINE__
    );
    rc = SCR_FAILURE;
  }

  /* free our dup'ed string and return error code */
  scr_free(&dircopy);
  scr_free(&path);
  return rc;
}
Beispiel #2
0
/* frees a hash element */
static int scr_hash_elem_delete(scr_hash_elem* elem)
{
  if (elem != NULL) {
    /* free the key which was strdup'ed */
    scr_free(&(elem->key));

    /* free the hash */
    scr_hash_delete(&elem->hash);
    elem->hash = NULL;

    /* finally, free the element structure itself */
    scr_free(&elem);
  } 
  return SCR_SUCCESS;
}
Beispiel #3
0
/* shut down the logging */
int scr_log_finalize()
{
  /* disconnect from database */
  if (scr_db_enable) {
    scr_mysql_disconnect();
  }

  /* free memory */
  scr_free(&scr_db_host);
  scr_free(&scr_db_user);
  scr_free(&scr_db_pass);
  scr_free(&scr_db_name);

  return SCR_SUCCESS;
}
Beispiel #4
0
void if_shutdown(void)
{
    /* Shut down curses cleanly */
    if (curses_initialized)
        swin_endwin();

    if (status_win) {
        swin_delwin(status_win);
        status_win = NULL;
    }

    if (gdb_scroller) {
        scr_free(gdb_scroller);
        gdb_scroller = NULL;
    }

    if (src_viewer) {
        source_free(src_viewer);
        src_viewer = NULL;
    }

    if (vseparator_win) {
        swin_delwin(vseparator_win);
        vseparator_win = NULL;
    }

    if (G_line_number) {
        ibuf_free(G_line_number);
        G_line_number = 0;
    }
}
Beispiel #5
0
/* lookup name in table, insert if it doesn't exist, and return id */
int scr_mysql_read_write_id(const char* table, const char* name, unsigned long* id)
{
  int rc = SCR_SUCCESS;

#ifdef HAVE_LIBMYSQLCLIENT
  /* if the value is already in the database, return its id */
  rc = scr_mysql_read_id(table, name, id);
  if (rc == SCR_SUCCESS) {
    return SCR_SUCCESS;
  }

  /* didn't find the value in the db, so let's add it */

  /* escape parameter */
  char* qname = scr_mysql_quote_string(name);

  /* check that we got valid strings for each of our parameters */
  if (qname == NULL) {
    scr_err("Failed to escape and quote one or more arguments @ %s:%d",
            __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* construct the query */
  char query[1024];
  int n = snprintf(query, sizeof(query),
    "INSERT IGNORE INTO `%s` (`id`,`name`) VALUES (NULL, %s) ;",
    table, qname
  );

  /* free the strings as they are now encoded into the query */
  scr_free(&qname);

  /* check that we were able to construct the query ok */
  if (n >= sizeof(query)) {
    scr_err("Insufficient buffer space (%lu bytes) to build query (%lu bytes) @ %s:%d",
            sizeof(query), n, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* execute the query */
  if (scr_db_debug >= 1) {
    scr_dbg(0, "%s", query);
  }
  if (mysql_real_query(&scr_mysql, query, (unsigned int) strlen(query))) {
    scr_err("Insert failed, query = (%s), error = (%s) @ %s:%d",
            query, mysql_error(&scr_mysql), __FILE__, __LINE__
    );
    /* don't return failure, since another process may have just beat us to the punch */
    /*return SCR_FAILURE;*/
  }

  /* alright, now we should be able to read the id */
  rc = scr_mysql_read_id(table, name, id);

#endif
  return rc;
}
Beispiel #6
0
/* read in the summary file from dir */
static int scr_summary_read_v6(const scr_path* dir, scr_hash* summary_hash)
{
  /* check that we got a pointer to a hash */
  if (summary_hash == NULL) {
    return SCR_FAILURE;
  }

  /* assume that we'll fail */
  int rc = SCR_FAILURE;

  /* build the summary filename */
  scr_path* summary_path = scr_path_dup(dir);
  scr_path_append_str(summary_path, ".scr");
  scr_path_append_str(summary_path, "summary.scr");
  char* summary_file = scr_path_strdup(summary_path);

  /* check whether we can read the file before we actually try,
   * we take this step to avoid printing an error in scr_hash_read */
  if (scr_file_is_readable(summary_file) != SCR_SUCCESS) {
    goto cleanup;
  }

  /* read in the summary hash file */
  if (scr_hash_read(summary_file, summary_hash) != SCR_SUCCESS) {
    scr_err("Reading summary file %s @ %s:%d",
      summary_file, __FILE__, __LINE__
    );
    goto cleanup;
  }

  /* read the version from the summary hash */
  int version;
  if (scr_hash_util_get_int(summary_hash, SCR_SUMMARY_KEY_VERSION, &version) != SCR_SUCCESS) {
    scr_err("Failed to read version from summary file %s @ %s:%d",
      summary_file, __FILE__, __LINE__
    );
    goto cleanup;
  }

  /* check that the version number matches */
  if (version != SCR_SUMMARY_FILE_VERSION_6) {
    scr_err("Summary file %s is version %d instead of version %d @ %s:%d",
      summary_file, version, SCR_SUMMARY_FILE_VERSION_6, __FILE__, __LINE__
    );
    goto cleanup;
  }

  /* if we made it here, we successfully read the summary file as a hash */
  rc = SCR_SUCCESS;

cleanup:
  /* free the summary file string */
  scr_free(&summary_file);
  scr_path_delete(&summary_path);

  return rc;
}
Beispiel #7
0
void if_shutdown(void)
{
    /* Shut down curses cleanly */
    if (curses_initialized)
        endwin();

    if (status_win != NULL)
        delwin(status_win);

    if (tty_status_win != NULL)
        delwin(tty_status_win);

    if (gdb_win != NULL)
        scr_free(gdb_win);

    if (tty_win != NULL)
        scr_free(tty_win);

    if (src_win != NULL)
        source_free(src_win);
}
Beispiel #8
0
/* read config files and store contents */
int scr_param_init()
{
  /* allocate storage and read in config files if we haven't already */
  if (scr_param_ref_count == 0) {
    /* allocate hash object to hold names we cannot read from the
     * environment */
    scr_no_user_hash = scr_hash_new();
    scr_hash_set(scr_no_user_hash, "SCR_CNTL_BASE", scr_hash_new());

    /* allocate hash object to store values from user config file,
     * if specified */
    char* user_file = user_config_path();
    if (user_file != NULL) {
      scr_user_hash = scr_hash_new();
      scr_config_read(user_file, scr_user_hash);
    }
    scr_free(&user_file);

    /* allocate hash object to store values from system config file */
    scr_system_hash = scr_hash_new();
    scr_config_read(scr_config_file, scr_system_hash);

    /* initialize our hash to cache lookups to getenv */
    scr_env_hash = scr_hash_new();

    /* warn user if he set any parameters in his environment or user
     * config file which aren't permitted */
    scr_hash_elem* elem;
    for (elem = scr_hash_elem_first(scr_no_user_hash);
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the parameter name */
      char* key = scr_hash_elem_key(elem);

      char* env_val = getenv(key);
      scr_hash* env_hash = scr_hash_get(scr_user_hash, key);

      /* check whether this is set in the environment */
      if (env_val != NULL || env_hash != NULL) {
        scr_err("%s cannot be set in the environment or user configuration file, ignoring setting",
          key
        );
      }
    }
  }

  /* increment our reference count */
  scr_param_ref_count++;

  return SCR_SUCCESS;
}
Beispiel #9
0
int scr_swap_file_names(
  const char* file_send, int rank_send,
        char* file_recv, size_t size_recv, int rank_recv,
  const char* dir_recv, MPI_Comm comm)
{
  int rc = SCR_SUCCESS;

  /* determine whether we have a file to send */
  int have_outgoing = 0;
  if (rank_send != MPI_PROC_NULL &&
      file_send != NULL &&
      strcmp(file_send, "") != 0)
  {
    have_outgoing = 1;
  } else {
    /* nothing to send, make sure to use PROC_NULL in sendrecv call */
    rank_send = MPI_PROC_NULL;
  }

  /* determine whether we are expecting to receive a file */
  int have_incoming = 0;
  if (rank_recv != MPI_PROC_NULL &&
      dir_recv != NULL &&
      strcmp(dir_recv, "") != 0)
  {
    have_incoming = 1;
  } else {
    /* nothing to recv, make sure to use PROC_NULL in sendrecv call */
    rank_recv = MPI_PROC_NULL;
  }

  /* exchange file names with partners, note that we initialize
   * file_recv_orig to NULL in case we recv from MPI_PROC_NULL */
  char* file_recv_orig = NULL;
  scr_str_sendrecv(file_send, rank_send, &file_recv_orig, rank_recv, comm);

  /* define the path to store our partner's file */
  if (have_incoming) {
    /* set path to file name */
    scr_path* path_recv = scr_path_from_str(file_recv_orig);
    scr_path_basename(path_recv);
    scr_path_prepend_str(path_recv, dir_recv);
    scr_path_strcpy(file_recv, size_recv, path_recv);
    scr_path_delete(&path_recv);

    /* free the file name we received */
    scr_free(&file_recv_orig);
  }

  return rc;
}
Beispiel #10
0
/* searches for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* we don't just return the getenv value directly because that causes
     * segfaults on some systems, so instead we add it to a hash and return
     * the pointer into the hash */

    /* try to lookup the value for this name in case we've already cached it */
    if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
      /* it's not in the hash yet, so add it */
      char* tmp_value = strdup(getenv(name));
      scr_hash_util_set_str(scr_env_hash, name, tmp_value);
      scr_free(&tmp_value);

      /* now issue our lookup again */
      if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
        /* it's an error if we don't find it this time */
        scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d",
          name, __FILE__, __LINE__
        );
      }
    }
    
    return value;
  }

  /* otherwise, if parameter is set in user configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Beispiel #11
0
/* frees a hash */
int scr_hash_delete(scr_hash** ptr_hash)
{
  if (ptr_hash != NULL) {
    scr_hash* hash = *ptr_hash;
    if (hash != NULL) {
      while (!LIST_EMPTY(hash)) {
        scr_hash_elem* elem = LIST_FIRST(hash);
        LIST_REMOVE(elem, pointers);
        scr_hash_elem_delete(elem);
      }
      scr_free(ptr_hash);
    }
  }
  return SCR_SUCCESS;
}
Beispiel #12
0
/* for file name listed in meta, fetch that file from src_dir and store
 * a copy in dst_dir, record full path to copy in newfile, and
 * return whether operation succeeded */
static int scr_fetch_file(
  const char* dst_file,
  const char* src_dir,
  const scr_meta* meta)
{
  int rc = SCR_SUCCESS;

  /* build full path to source file */
  scr_path* path_src_file = scr_path_from_str(dst_file);
  scr_path_basename(path_src_file);
  scr_path_prepend_str(path_src_file, src_dir);
  char* src_file = scr_path_strdup(path_src_file);

  /* fetch the file */
  uLong crc;
  uLong* crc_p = NULL;
  if (scr_crc_on_flush) {
    crc_p = &crc;
  }
  rc = scr_file_copy(src_file, dst_file, scr_file_buf_size, crc_p);

  /* check that crc matches crc stored in meta */
  uLong meta_crc;
  if (scr_meta_get_crc32(meta, &meta_crc) == SCR_SUCCESS) {
    if (rc == SCR_SUCCESS && scr_crc_on_flush && crc != meta_crc) {
      rc = SCR_FAILURE;
      scr_err("CRC32 mismatch detected when fetching file from %s to %s @ %s:%d",
        src_file, dst_file, __FILE__, __LINE__
      );

      /* TODO: would be good to log this, but right now only rank 0
       * can write log entries */
      /*
      if (scr_log_enable) {
        time_t now = scr_log_seconds();
        scr_log_event("CRC32 MISMATCH", filename, NULL, &now, NULL);
      }
      */
    }
  }

  /* free path and string for source file */
  scr_free(&src_file);
  scr_path_delete(&path_src_file);

  return rc;
}
Beispiel #13
0
/* allocates a new string (to be freed with scr_free)
 * that is path to user config file */
static char* user_config_path()
{
  char* file = NULL;

  /* first, use SCR_CONF_FILE if it's set */
  char* value = getenv("SCR_CONF_FILE");
  if (value != NULL) {
    file = strdup(value);
    return file;
  }

  /* otherwise, look in the prefix directory */
  char* prefix = NULL;
  value = getenv("SCR_PREFIX");
  if (value != NULL) {
    /* user set SCR_PREFIX, strdup that value */
    prefix = strdup(value);
  } else {
    /* if user didn't set with SCR_PREFIX,
     * pick up the current working directory as a default */
    char current_dir[SCR_MAX_FILENAME];
    if (scr_getcwd(current_dir, sizeof(current_dir)) != SCR_SUCCESS) {
      scr_abort(-1, "Problem reading current working directory @ %s:%d",
        __FILE__, __LINE__
      );
    }
    prefix = strdup(current_dir);
  }

  /* couldn't find a prefix directory, so bail */
  if (prefix == NULL) {
    return file;
  }

  /* tack file name on to directory */
  scr_path* prefix_path = scr_path_from_str(prefix);
  scr_path_append_str(prefix_path, SCR_CONFIG_FILE_USER);
  file = scr_path_strdup(prefix_path);
  scr_path_delete(&prefix_path);

  /* free the prefix dir which we strdup'd */
  scr_free(&prefix);

  return file;
}
Beispiel #14
0
/* searchs for name and returns a newly allocated hash of its value if set,
 * returns NULL if not found */
scr_hash* scr_param_get_hash(char* name)
{
  scr_hash* hash = NULL;
  scr_hash* value_hash = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* TODO: need to strdup here to be safe? */
    hash = scr_hash_new();
    char* tmp_value = strdup(getenv(name));
    scr_hash_set(hash, tmp_value, scr_hash_new());
    scr_free(&tmp_value);
    return hash;
  }

  /* otherwise, if parameter is set in user configuration file,
   * return that value */
  value_hash = scr_hash_get(scr_user_hash, name);
  if (no_user == NULL && value_hash != NULL) {
    hash = scr_hash_new();
    scr_hash_merge(hash, value_hash);
    return hash;
  }

  /* otherwise, if parameter is set in system configuration file,
   * return that value */
  value_hash = scr_hash_get(scr_system_hash, name);
  if (value_hash != NULL) {
    hash = scr_hash_new();
    scr_hash_merge(hash, value_hash);
    return hash;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Beispiel #15
0
/* read in the summary file from dir */
static int scr_summary_read_v5(const scr_path* dir, scr_hash* summary_hash)
{
  /* check that we got a pointer to a hash */
  if (summary_hash == NULL) {
    return SCR_FAILURE;
  }

  /* assume that we'll fail */
  int rc = SCR_FAILURE;

  /* build the summary filename */
  scr_path* summary_path = scr_path_dup(dir);
  scr_path_append_str(summary_path, "summary.scr");
  char* summary_file = scr_path_strdup(summary_path);

  /* check whether we can read the file before we actually try,
   * we take this step to avoid printing an error in scr_hash_read */
  if (scr_file_is_readable(summary_file) != SCR_SUCCESS) {
    goto cleanup;
  }

  /* read in the summary hash file */
  if (scr_hash_read_path(summary_path, summary_hash) != SCR_SUCCESS) {
    scr_err("Reading summary file %s @ %s:%d",
      summary_file, __FILE__, __LINE__
    );
    goto cleanup;
  }

  /* if we made it here, we successfully read the summary file as a hash */
  rc = SCR_SUCCESS;

cleanup:
  /* free the summary path */
  scr_free(&summary_file);
  scr_path_delete(&summary_path);

  return rc;
}
Beispiel #16
0
static int pollterms(void)
{
	struct pollfd ufds[NTERMS + 1];
	int term_idx[NTERMS + 1];
	int i;
	int n = 1;
	ufds[0].fd = 0;
	ufds[0].events = POLLIN;
	for (i = 0; i < NTERMS; i++) {
		if (TERMOPEN(i)) {
			ufds[n].fd = terms[i].fd;
			ufds[n].events = POLLIN;
			term_idx[n++] = i;
		}
	}
	if (poll(ufds, n, 1000) < 1)
		return 0;
	if (ufds[0].revents & (POLLFLAGS & ~POLLIN))
		return 1;
	if (ufds[0].revents & POLLIN)
		directkey();
	for (i = 1; i < n; i++) {
		if (!(ufds[i].revents & POLLFLAGS))
			continue;
		peepterm(term_idx[i]);
		if (ufds[i].revents & POLLIN) {
			term_read();
		} else {
			scr_free(term_idx[i]);
			term_end();
			if (cmdmode)
				exitit = 1;
		}
		peepback(term_idx[i]);
	}
	return 0;
}
Beispiel #17
0
int main(int argc, char* argv[])
{
  /* TODO: need to check that we got one and only one parameter */

  /* read in the filename */
  char* filename = strdup(argv[1]);

  /* open the file for reading */
  uLong crc = crc32(0L, Z_NULL, 0);
  if (scr_crc32(filename, &crc) != SCR_SUCCESS) {
    scr_err("Failed to compute CRC32 for file %s @ file %s:%d",
            filename, __FILE__, __LINE__
    );
    return 1;
  }

  /* print out the crc32 value */
  printf("%lx\n", (unsigned long) crc);

  /* free off the string we strdup'ed at the start */
  scr_free(&filename);

  return 0;
}
Beispiel #18
0
/* remove any dataset ids from flush file which are not in cache,
 * and add any datasets in cache that are not in the flush file */
int scr_flush_file_rebuild(const scr_filemap* map)
{
  if (scr_my_rank_world == 0) {
    /* read the flush file */
    scr_hash* hash = scr_hash_new();
    scr_hash_read_path(scr_flush_file, hash);

    /* get ordered list of dataset ids in flush file */
    int flush_ndsets;
    int* flush_dsets;
    scr_hash* flush_dsets_hash = scr_hash_get(hash, SCR_FLUSH_KEY_DATASET);
    scr_hash_list_int(flush_dsets_hash, &flush_ndsets, &flush_dsets);

    /* get ordered list of dataset ids in cache */
    int cache_ndsets;
    int* cache_dsets;
    scr_filemap_list_datasets(map, &cache_ndsets, &cache_dsets);

    int flush_index = 0;
    int cache_index = 0;
    while (flush_index < flush_ndsets && cache_index < cache_ndsets) {
      /* get next smallest index from flush file and cache */
      int flush_dset = flush_dsets[flush_index];
      int cache_dset = cache_dsets[cache_index];

      if (flush_dset < cache_dset) {
        /* dataset exists in flush file but not in cache,
         * delete it from the flush file */
        scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset);
        flush_index++;
      } else if (cache_dset < flush_dset) {
        /* dataset exists in cache but not flush file,
         * add it to the flush file */
        scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
        scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        cache_index++;
      } else {
        /* dataset exists in cache and the flush file,
         * ensure that it is listed as being in the cache */
        scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
        scr_hash_unset_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
        flush_index++;
        cache_index++;
      }
    }
    while (flush_index < flush_ndsets) {
      /* dataset exists in flush file but not in cache,
       * delete it from the flush file */
      int flush_dset = flush_dsets[flush_index];
      scr_hash_unset_kv_int(hash, SCR_FLUSH_KEY_DATASET, flush_dset);
      flush_index++;
    }
    while (cache_index < cache_ndsets) {
      /* dataset exists in cache but not flush file,
       * add it to the flush file */
      int cache_dset = cache_dsets[cache_index];
      scr_hash* dset_hash = scr_hash_set_kv_int(hash, SCR_FLUSH_KEY_DATASET, cache_dset);
      scr_hash_set_kv(dset_hash, SCR_FLUSH_KEY_LOCATION, SCR_FLUSH_KEY_LOCATION_CACHE);
      cache_index++;
    }

    /* free our list of cache dataset ids */
    scr_free(&cache_dsets);

    /* free our list of flush file dataset ids */
    scr_free(&flush_dsets);

    /* write the hash back to the flush file */
    scr_hash_write_path(scr_flush_file, hash);

    /* delete the hash */
    scr_hash_delete(&hash);
  }
  return SCR_SUCCESS;
}
Beispiel #19
0
int internal_if_input(int key)
{
    int regex_icase = cgdbrc_get(CGDBRC_IGNORECASE)->variant.int_val;

    /* Normally, CGDB_KEY_ESC, but can be configured by the user */
    int cgdb_mode_key = cgdbrc_get(CGDBRC_CGDB_MODE_KEY)->variant.int_val;

    /* The cgdb mode key, puts the debugger into command mode */
    if (focus != CGDB && key == cgdb_mode_key) {
        /* Depending on which cgdb was in, it can free some memory here that
         * it was previously using. */
        if (focus == CGDB_STATUS_BAR && sbc_kind == SBC_NORMAL) {
            ibuf_free(cur_sbc);
            cur_sbc = NULL;
        } else if (focus == CGDB_STATUS_BAR && sbc_kind == SBC_REGEX) {
            ibuf_free(regex_cur);
            regex_cur = NULL;
            free(src_win->cur->buf.cur_line);
            src_win->cur->buf.cur_line = NULL;
            src_win->cur->sel_rline = orig_line_regex;
            src_win->cur->sel_line = orig_line_regex;
        }
        if_set_focus(CGDB);
        return 0;
    }
    /* If you are already in cgdb mode, the cgdb mode key does nothing */
    else if (key == cgdb_mode_key)
        return 0;

    /* Check for global keystrokes */
    switch (focus) {
        case CGDB:
            switch (key) {
                case 'i':
                    if_set_focus(GDB);
                    return 0;
                case 'I':
                    if_set_focus(TTY);
                    return 0;
                case ':':
                    /* Set the type of the command the user is typing in the status bar */
                    sbc_kind = SBC_NORMAL;
                    if_set_focus(CGDB_STATUS_BAR);
                    /* Since the user is about to type in a command, allocate a buffer 
                     * in which this command can be stored. */
                    cur_sbc = ibuf_init();
                    return 0;
                case '/':
                case '?':
                    if (src_win->cur != NULL) {
                        regex_cur = ibuf_init();
                        regex_direction_cur = ('/' == key);
                        orig_line_regex = src_win->cur->sel_line;

                        sbc_kind = SBC_REGEX;
                        if_set_focus(CGDB_STATUS_BAR);

                        /* Capturing regular expressions */
                        source_search_regex_init(src_win);

                        /* Initialize the function for finding a regex and tell user */
                        if_draw();
                    }
                    return 0;
                case 'n':
                    source_search_regex(src_win, ibuf_get(regex_last), 2,
                            regex_direction_last, regex_icase);
                    if_draw();
                    break;
                case 'N':
                    source_search_regex(src_win, ibuf_get(regex_last), 2,
                            !regex_direction_last, regex_icase);
                    if_draw();
                    break;
                case 'T':
                    if (tty_win_on) {
                        tty_win_on = 0;
                        focus = CGDB;
                    } else {
                        tty_win_on = 1;
                        focus = TTY;
                    }

                    if_layout();

                    break;
                case CGDB_KEY_CTRL_T:
                    if (tgdb_tty_new(tgdb) == -1) {
                        /* Error */
                    } else {
                        scr_free(tty_win);
                        tty_win = NULL;
                        if_layout();
                    }

                    break;
                case CGDB_KEY_F1:
                    if_display_help();
                    return 0;
                case CGDB_KEY_F5:
                    /* Issue GDB run command */
                {
                    tgdb_request_ptr request_ptr;

                    request_ptr =
                            tgdb_request_run_debugger_command(tgdb, TGDB_RUN);
                    handle_request(tgdb, request_ptr);
                }
                    return 0;
                case CGDB_KEY_F6:
                    /* Issue GDB continue command */
                {
                    tgdb_request_ptr request_ptr;

                    request_ptr =
                            tgdb_request_run_debugger_command(tgdb,
                            TGDB_CONTINUE);
                    handle_request(tgdb, request_ptr);
                }
                    return 0;
                case CGDB_KEY_F7:
                    /* Issue GDB finish command */
                {
                    tgdb_request_ptr request_ptr;

                    request_ptr =
                            tgdb_request_run_debugger_command(tgdb,
                            TGDB_FINISH);
                    handle_request(tgdb, request_ptr);
                }
                    return 0;
                case CGDB_KEY_F8:
                    /* Issue GDB next command */
                {
                    tgdb_request_ptr request_ptr;

                    request_ptr =
                            tgdb_request_run_debugger_command(tgdb, TGDB_NEXT);
                    handle_request(tgdb, request_ptr);
                }
                    return 0;
                case CGDB_KEY_F10:
                    /* Issue GDB step command */
                {
                    tgdb_request_ptr request_ptr;

                    request_ptr =
                            tgdb_request_run_debugger_command(tgdb, TGDB_STEP);
                    handle_request(tgdb, request_ptr);
                }
                    return 0;
                case CGDB_KEY_CTRL_L:
                    if_layout();
                    return 0;
            }
            source_input(src_win, key);
            return 0;
            break;
        case TTY:
            return tty_input(key);
        case GDB:
            return gdb_input(key);
        case FILE_DLG:
        {
            static char filedlg_file[MAX_LINE];
            int ret = filedlg_recv_char(fd, key, filedlg_file);

            /* The user cancelled */
            if (ret == -1) {
                if_set_focus(CGDB);
                return 0;
                /* Needs more data */
            } else if (ret == 0) {
                return 0;
                /* The user picked a file */
            } else if (ret == 1) {
                tgdb_request_ptr request_ptr;

                request_ptr = tgdb_request_filename_pair(tgdb, filedlg_file);
                handle_request(tgdb, request_ptr);
                if_set_focus(CGDB);
                return 0;
            }
        }
            return 0;
        case CGDB_STATUS_BAR:
            return status_bar_input(src_win, key);
    }

    /* Never gets here */
    return 0;
}
Beispiel #20
0
/* this transfers redundancy descriptors for the given dataset id */
static int scr_distribute_reddescs(scr_filemap* map, int id, scr_reddesc* red)
{
  int i;

  /* create a new hash to record redundancy descriptors that we have */
  scr_hash* send_hash = scr_hash_new();

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* for each rank we have files for, check whether we also have
   * its redundancy descriptor */
  int invalid_rank_found = 0;
  for (i=0; i < nranks; i++) {
    /* get the rank id */
    int rank = ranks[i];

    /* check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }

    /* lookup the redundancy descriptor hash for this rank */
    scr_hash* desc = scr_hash_new();
    scr_filemap_get_desc(map, id, rank, desc);

    /* if this descriptor has entries, add it to our send hash,
     * delete the hash otherwise */
    if (scr_hash_size(desc) > 0) {
      scr_hash_setf(send_hash, desc, "%d", rank);
    } else {
      scr_hash_delete(&desc);
    }
  }

  /* free off our list of ranks */
  scr_free(&ranks);

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_hash_delete(&send_hash);
    return SCR_FAILURE;
  }

  /* create an empty hash to receive any incoming descriptors */
  /* exchange descriptors with other ranks */
  scr_hash* recv_hash = scr_hash_new();
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* check that everyone can get their descriptor */
  int num_desc = scr_hash_size(recv_hash);
  if (! scr_alltrue(num_desc > 0)) {
    scr_hash_delete(&recv_hash);
    scr_hash_delete(&send_hash);
    scr_dbg(2, "Cannot find process that has my redundancy descriptor @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* just go with the first redundancy descriptor in our list,
   * they should all be the same */
  scr_hash_elem* desc_elem = scr_hash_elem_first(recv_hash);
  scr_hash* desc_hash = scr_hash_elem_hash(desc_elem);

  /* record the descriptor in our filemap */
  scr_filemap_set_desc(map, id, scr_my_rank_world, desc_hash);
  scr_filemap_write(scr_map_file, map);

  /* TODO: at this point, we could delete descriptors for other
   * ranks for this checkpoint */

  /* create our redundancy descriptor struct from the map */
  scr_reddesc_create_from_filemap(map, id, scr_my_rank_world, red);

  /* free off our send and receive hashes */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  return SCR_SUCCESS;
}
Beispiel #21
0
/* this moves all files of the specified dataset in the cache to
 * make them accessible to new rank mapping */
static int scr_distribute_files(scr_filemap* map, const scr_reddesc* red, int id)
{
  int i, round;
  int rc = SCR_SUCCESS;

  /* TODO: mark dataset as being distributed in filemap,
   * because if we fail in the middle of a distribute,
   * we can't trust the contents of the files anymore,
   * at which point it should be deleted */

  /* clean out any incomplete files before we start */
  scr_cache_clean(map);

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* walk backwards through the list of ranks, and set our start index
   * to the rank which is the first rank that is equal to or higher
   * than our own rank -- when we assign round ids below, this offsetting
   * helps distribute the load */
  int start_index = 0;
  int invalid_rank_found = 0;
  for (i = nranks-1; i >= 0; i--) {
    int rank = ranks[i];

    /* pick the first rank whose rank id is equal to or higher than our own */
    if (rank >= scr_my_rank_world) {
      start_index = i;
    }

    /* while we're at it, check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }
  }

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_free(&ranks);
    return SCR_FAILURE;
  }

  /* allocate array to record the rank we can send to in each round */
  int* have_rank_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);
  int* send_flag_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);

  /* check that we have all of the files for each rank,
   * and determine the round we can send them */
  scr_hash* send_hash = scr_hash_new();
  scr_hash* recv_hash = scr_hash_new();
  for (round = 0; round < nranks; round++) {
    /* get the rank id */
    int index = (start_index + round) % nranks;
    int rank = ranks[index];

    /* record the rank indexed by the round number */
    have_rank_by_round[round] = rank;

    /* assume we won't be sending to this rank in this round */
    send_flag_by_round[round] = 0;

    /* if we have files for this rank, specify the round we can
     * send those files in */
    if (scr_bool_have_files(map, id, rank)) {
      scr_hash_setf(send_hash, NULL, "%d %d", rank, round);
    }
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* search for the minimum round we can get our files */
  int retrieve_rank  = -1;
  int retrieve_round = -1;
  scr_hash_elem* elem = NULL;
  for (elem = scr_hash_elem_first(recv_hash);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the rank id */
    int rank = scr_hash_elem_key_int(elem);

    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record this round and rank number if it's less than the current round */
    if (round < retrieve_round || retrieve_round == -1) {
      retrieve_round = round;
      retrieve_rank  = rank;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  /* free off our list of ranks */
  scr_free(&ranks);

  /* for some redundancy schemes, we know at this point whether we
   * can recover all files */
  int can_get_files = (retrieve_rank != -1);
  if (red->copy_type != SCR_COPY_XOR && !scr_alltrue(can_get_files)) {
    /* print a debug message indicating which rank is missing files */
    if (! can_get_files) {
      scr_dbg(2, "Cannot find process that has my checkpoint files @ %s:%d",
        __FILE__, __LINE__
      );
    }
    return SCR_FAILURE;
  }

  /* get the maximum retrieve round */
  int max_rounds = 0;
  MPI_Allreduce(
    &retrieve_round, &max_rounds, 1, MPI_INT, MPI_MAX, scr_comm_world
  );

  /* tell destination which round we'll take our files in */
  send_hash = scr_hash_new();
  recv_hash = scr_hash_new();
  if (retrieve_rank != -1) {
    scr_hash_setf(send_hash, NULL, "%d %d", retrieve_rank, retrieve_round);
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* determine which ranks want to fetch their files from us */
  for(elem = scr_hash_elem_first(recv_hash);
      elem != NULL;
      elem = scr_hash_elem_next(elem))
  {
    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record whether this rank wants its files from us */
    if (round >= 0 && round < nranks) {
      send_flag_by_round[round] = 1;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  int tmp_rc = 0;

  /* run through rounds and exchange files */
  for (round = 0; round <= max_rounds; round++) {
    /* assume we don't need to send or receive any files this round */
    int send_rank = MPI_PROC_NULL;
    int recv_rank = MPI_PROC_NULL;
    int send_num  = 0;
    int recv_num  = 0;

    /* check whether I can potentially send to anyone in this round */
    if (round < nranks) {
      /* have someone's files, check whether they are asking
       * for them this round */
      if (send_flag_by_round[round]) {
        /* need to send files this round, remember to whom and how many */
        int dst_rank = have_rank_by_round[round];
        send_rank = dst_rank;
        send_num  = scr_filemap_num_files(map, id, dst_rank);
      }
    }

    /* if I'm supposed to get my files this round, set the recv_rank */
    if (retrieve_round == round) {
      recv_rank = retrieve_rank;
    }

    /* TODO: another special case is to just move files if the
     * processes are on the same node */

    /* if i'm sending to myself, just move (rename) each file */
    if (send_rank == scr_my_rank_world) {
      /* get our file list */
      int numfiles = 0;
      char** files = NULL;
      scr_filemap_list_files(map, id, send_rank, &numfiles, &files);

      /* TODO: sort files in reverse order by size */

      /* iterate over and rename each file */
      for (i=0; i < numfiles; i++) {
        /* get the current file name */
        char* file = files[i];

        /* lookup meta data for this file */
        scr_meta* meta = scr_meta_new();
        scr_filemap_get_meta(map, id, send_rank, file, meta);

        /* get the path for this file based on its type
         * and dataset id */
        char* dir = NULL;
        if (scr_meta_check_filetype(meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
          dir = scr_cache_dir_get(red, id);
        } else {
          dir = scr_cache_dir_hidden_get(red, id);
        }

        /* build the new file name */
        scr_path* path_newfile = scr_path_from_str(file);
        scr_path_basename(path_newfile);
        scr_path_prepend_str(path_newfile, dir);
        char* newfile = scr_path_strdup(path_newfile);

        /* if the new file name is different from the old name, rename it */
        if (strcmp(file, newfile) != 0) {
          /* record the new filename to our map and write it to disk */
          scr_filemap_add_file(map, id, send_rank, newfile);
          scr_filemap_set_meta(map, id, send_rank, newfile, meta);
          scr_filemap_write(scr_map_file, map);

          /* rename the file */
          scr_dbg(2, "Round %d: rename(%s, %s)", round, file, newfile);
          tmp_rc = rename(file, newfile);
          if (tmp_rc != 0) {
            /* TODO: to cross mount points, if tmp_rc == EXDEV,
             * open new file, copy, and delete orig */
            scr_err("Moving checkpoint file: rename(%s, %s) %s errno=%d @ %s:%d",
              file, newfile, strerror(errno), errno, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* remove the old name from the filemap and write it to disk */
          scr_filemap_remove_file(map, id, send_rank, file);
          scr_filemap_write(scr_map_file, map);
        }

        /* free the path and string */
        scr_free(&newfile);
        scr_path_delete(&path_newfile);

        /* free directory string */
        scr_free(&dir);

        /* free meta data */
        scr_meta_delete(&meta);
      }

      /* free the list of filename pointers */
      scr_free(&files);
    } else {
      /* if we have files for this round, but the correspdonding
       * rank doesn't need them, delete the files */
      if (round < nranks && send_rank == MPI_PROC_NULL) {
        int dst_rank = have_rank_by_round[round];
        scr_unlink_rank(map, id, dst_rank);
      }

      /* sending to and/or recieving from another node */
      if (send_rank != MPI_PROC_NULL || recv_rank != MPI_PROC_NULL) {
        /* have someone to send to or receive from */
        int have_outgoing = 0;
        int have_incoming = 0;
        if (send_rank != MPI_PROC_NULL) {
          have_outgoing = 1;
        }
        if (recv_rank != MPI_PROC_NULL) {
          have_incoming = 1;
        }

        /* first, determine how many files I will be receiving and
         * tell how many I will be sending */
        MPI_Request request[2];
        MPI_Status  status[2];
        int num_req = 0;
        if (have_incoming) {
          MPI_Irecv(
            &recv_num, 1, MPI_INT, recv_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (have_outgoing) {
          MPI_Isend(
            &send_num, 1, MPI_INT, send_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (num_req > 0) {
          MPI_Waitall(num_req, request, status);
        }

        /* record how many files I will receive (need to distinguish
         * between 0 files and not knowing) */
        if (have_incoming) {
          scr_filemap_set_expected_files(map, id, scr_my_rank_world, recv_num);
        }

        /* turn off send or receive flags if the file count is 0,
         * nothing else to do */
        if (send_num == 0) {
          have_outgoing = 0;
          send_rank = MPI_PROC_NULL;
        }
        if (recv_num == 0) {
          have_incoming = 0;
          recv_rank = MPI_PROC_NULL;
        }

        /* TODO: since we overwrite files in place in order to avoid
         * running out of storage space, we should sort files in order
         * of descending size for the next step */

        /* get our file list for the destination */
        int numfiles = 0;
        char** files = NULL;
        if (have_outgoing) {
          scr_filemap_list_files(map, id, send_rank, &numfiles, &files);
        }

        /* while we have a file to send or receive ... */
        while (have_incoming || have_outgoing) {
          /* get the filename */
          char* file = NULL;
          scr_meta* send_meta = NULL;
          if (have_outgoing) {
            file = files[numfiles - send_num];
            send_meta = scr_meta_new();
            scr_filemap_get_meta(map, id, send_rank, file, send_meta);
          }

          /* exchange meta data so we can determine type of incoming file */
          scr_meta* recv_meta = scr_meta_new();
          scr_hash_sendrecv(send_meta, send_rank, recv_meta, recv_rank, scr_comm_world);

          /* get the path for this file based on its type and dataset id */
          char* dir = NULL;
          if (have_incoming) {
            if (scr_meta_check_filetype(recv_meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
              dir = scr_cache_dir_get(red, id);
            } else {
              dir = scr_cache_dir_hidden_get(red, id);
            }
          }

          /* exhange file names with partners,
           * building full path of incoming file */
          char file_partner[SCR_MAX_FILENAME];
          scr_swap_file_names(
            file, send_rank, file_partner, sizeof(file_partner), recv_rank,
            dir, scr_comm_world
          );

          /* free directory string */
          scr_free(&dir);

          /* free incoming meta data (we'll get this again later) */
          scr_meta_delete(&recv_meta);

          /* if we'll receive a file, record the name of our file
           * in the filemap and write it to disk */
          recv_meta = NULL;
          if (recv_rank != MPI_PROC_NULL) {
            recv_meta = scr_meta_new();
            scr_filemap_add_file(map, id, scr_my_rank_world, file_partner);
            scr_filemap_write(scr_map_file, map);
          }

          /* either sending or receiving a file this round, since we move files,
           * it will be deleted or overwritten */
          if (scr_swap_files(MOVE_FILES, file, send_meta, send_rank,
              file_partner, recv_meta, recv_rank, scr_comm_world) != SCR_SUCCESS)
          {
            scr_err("Swapping files: %s to %d, %s from %d @ %s:%d",
                    file, send_rank, file_partner, recv_rank, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* if we received a file, record its meta data and decrement
           * our receive count */
          if (have_incoming) {
            /* record meta data for the file we received */
            scr_filemap_set_meta(map, id, scr_my_rank_world, file_partner, recv_meta);
            scr_meta_delete(&recv_meta);

            /* decrement receive count */
            recv_num--;
            if (recv_num == 0) {
              have_incoming = 0;
              recv_rank = MPI_PROC_NULL;
            }
          }

          /* if we sent a file, remove it from the filemap and decrement
           * our send count */
          if (have_outgoing) {
            /* remove file from the filemap */
            scr_filemap_remove_file(map, id, send_rank, file);
            scr_meta_delete(&send_meta);

            /* decrement our send count */
            send_num--;
            if (send_num == 0) {
              have_outgoing = 0;
              send_rank = MPI_PROC_NULL;
            }
          }

          /* update filemap on disk */
          scr_filemap_write(scr_map_file, map);
        }

        /* free our file list */
        scr_free(&files);
      }
    }
  }

  /* if we have more rounds than max rounds, delete the remainder of our files */
  for (round = max_rounds+1; round < nranks; round++) {
    /* have someone's files for this round, so delete them */
    int dst_rank = have_rank_by_round[round];
    scr_unlink_rank(map, id, dst_rank);
  }

  scr_free(&send_flag_by_round);
  scr_free(&have_rank_by_round);

  /* write out new filemap and free the memory resources */
  scr_filemap_write(scr_map_file, map);

  /* clean out any incomplete files */
  scr_cache_clean(map);

  /* TODO: if the exchange or redundancy rebuild failed,
   * we should also delete any *good* files we received */

  /* return whether distribute succeeded, it does not ensure we have
   * all of our files, only that the transfer completed without failure */
  return rc;
}
Beispiel #22
0
/* since on a restart we may end up with more or fewer ranks on a node than the
 * previous run, rely on the master to read in and distribute the filemap to
 * other ranks on the node */
int scr_scatter_filemaps(scr_filemap* my_map)
{
  /* TODO: if the control directory is on a device shared by lots of procs,
   * we should read and distribute this data in a more scalable way */

  /* allocate empty send hash */
  scr_hash* send_hash = scr_hash_new();

  /* if i'm the master on this node, read in all filemaps */
  if (scr_storedesc_cntl->rank == 0) {
    /* create an empty filemap */
    scr_filemap* all_map = scr_filemap_new();

    /* read in the master map */
    scr_hash* hash = scr_hash_new();
    scr_hash_read_path(scr_master_map_file, hash);

    /* for each filemap listed in the master map */
    scr_hash_elem* elem;
    for (elem = scr_hash_elem_first(scr_hash_get(hash, "Filemap"));
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the filename of this filemap */
      char* file = scr_hash_elem_key(elem);

      /* TODO MEMFS: mount storage for each filemap */

      /* read in the filemap */
      scr_filemap* tmp_map = scr_filemap_new();
      scr_path* path_file = scr_path_from_str(file);
      scr_filemap_read(path_file, tmp_map);
      scr_path_delete(&path_file);

      /* merge it with the all_map */
      scr_filemap_merge(all_map, tmp_map);

      /* delete filemap */
      scr_filemap_delete(&tmp_map);

      /* TODO: note that if we fail after unlinking this file but before
       * writing out the new file, we'll lose information */

      /* delete the file */
      scr_file_unlink(file);
    }

    /* free the hash object */
    scr_hash_delete(&hash);

    /* write out new local 0 filemap */
    if (scr_filemap_num_ranks(all_map) > 0) {
      scr_filemap_write(scr_map_file, all_map);
    }

    /* get global rank of each rank */
    int* ranks = (int*) SCR_MALLOC(scr_storedesc_cntl->ranks * sizeof(int));
    MPI_Gather(
      &scr_my_rank_world, 1, MPI_INT, ranks, 1, MPI_INT,
      0, scr_storedesc_cntl->comm
    );

    /* for each rank, send them their own file data if we have it */
    int i;
    for (i=0; i < scr_storedesc_cntl->ranks; i++) {
      int rank = ranks[i];
      if (scr_filemap_have_rank(all_map, rank)) {
        /* extract the filemap for this rank */
        scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, rank);

        /* get a reference to the hash object that we'll send to this rank,
         * and merge this filemap into it */
        scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i);
        if (tmp_hash == NULL) {
          /* if we don't find an existing entry in the send_hash,
           * create an empty hash and insert it */
          scr_hash* empty_hash = scr_hash_new();
          scr_hash_setf(send_hash, empty_hash, "%d", i);
          tmp_hash = empty_hash;
        }
        scr_hash_merge(tmp_hash, tmp_map);

        /* delete the filemap for this rank */
        scr_filemap_delete(&tmp_map);
      }
    }

    /* free our rank list */
    scr_free(&ranks);

    /* now just round robin the remainder across the set (load balancing) */
    int num;
    int* remaining_ranks = NULL;
    scr_filemap_list_ranks(all_map, &num, &remaining_ranks);

    int j = 0;
    while (j < num) {
      /* pick a rank in to send to */
      i = j % scr_storedesc_cntl->ranks;

      /* extract the filemap for this rank */
      scr_filemap* tmp_map = scr_filemap_extract_rank(all_map, remaining_ranks[j]);

      /* get a reference to the hash object that we'll send to this rank,
       * and merge this filemap into it */
      scr_hash* tmp_hash = scr_hash_getf(send_hash, "%d", i);
      if (tmp_hash == NULL) {
        /* if we don't find an existing entry in the send_hash,
         * create an empty hash and insert it */
        scr_hash* empty_hash = scr_hash_new();
        scr_hash_setf(send_hash, empty_hash, "%d", i);
        tmp_hash = empty_hash;
      }
      scr_hash_merge(tmp_hash, tmp_map);

      /* delete the filemap for this rank */
      scr_filemap_delete(&tmp_map);
      j++;
    }

    scr_free(&remaining_ranks);

    /* delete the filemap */
    scr_filemap_delete(&all_map);

    /* write out the new master filemap */
    hash = scr_hash_new();
    char file[SCR_MAX_FILENAME];
    for (i=0; i < scr_storedesc_cntl->ranks; i++) {
      sprintf(file, "%s/filemap_%d.scrinfo", scr_cntl_prefix, i);
      scr_hash_set_kv(hash, "Filemap", file);
    }
    scr_hash_write_path(scr_master_map_file, hash);
    scr_hash_delete(&hash);
  } else {
    /* send our global rank to the master */
    MPI_Gather(
      &scr_my_rank_world, 1, MPI_INT, NULL, 1, MPI_INT,
      0, scr_storedesc_cntl->comm
    );
  }

  /* receive our filemap from master */
  scr_hash* recv_hash = scr_hash_new();
  scr_hash_exchange(send_hash, recv_hash, scr_storedesc_cntl->comm);

  /* merge map sent from master into our map */
  scr_hash* map_from_master = scr_hash_getf(recv_hash, "%d", 0);
  if (map_from_master != NULL) {
    scr_hash_merge(my_map, map_from_master);
  }

  /* write out our local filemap */
  if (scr_filemap_num_ranks(my_map) > 0) {
    scr_filemap_write(scr_map_file, my_map);
  }

  /* free off our send and receive hashes */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  return SCR_SUCCESS;
}
Beispiel #23
0
/* broadcast dataset hash from smallest rank we can find that has a copy */
static int scr_distribute_datasets(scr_filemap* map, int id)
{
  int i;

  /* create a new hash to record dataset descriptor */
  scr_hash* send_hash = scr_hash_new();

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* for each rank we have files for,
   * check whether we also have its dataset descriptor */
  int invalid_rank_found = 0;
  int have_dset = 0;
  for (i=0; i < nranks; i++) {
    /* get the rank id */
    int rank = ranks[i];

    /* check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }

    /* lookup the dataset descriptor hash for this rank */
    scr_hash* desc = scr_hash_new();
    scr_filemap_get_dataset(map, id, rank, desc);

    /* if this descriptor has entries, add it to our send hash,
     * delete the hash otherwise */
    if (scr_hash_size(desc) > 0) {
      have_dset = 1;
      scr_hash_merge(send_hash, desc);
      scr_hash_delete(&desc);
      break;
    } else {
      scr_hash_delete(&desc);
    }
  }

  /* free off our list of ranks */
  scr_free(&ranks);

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_hash_delete(&send_hash);
    return SCR_FAILURE;
  }

  /* identify the smallest rank that has the dataset */
  int source_rank = scr_ranks_world;
  if (have_dset) {
    source_rank = scr_my_rank_world;
  }
  int min_rank;
  MPI_Allreduce(&source_rank, &min_rank, 1, MPI_INT, MPI_MIN, scr_comm_world);

  /* if there is no rank, return with failure */
  if (min_rank >= scr_ranks_world) {
    scr_hash_delete(&send_hash);
    return SCR_FAILURE;
  }

  /* otherwise, bcast the dataset from the minimum rank */
  if (scr_my_rank_world != min_rank) {
    scr_hash_unset_all(send_hash);
  }
  scr_hash_bcast(send_hash, min_rank, scr_comm_world);

  /* record the descriptor in our filemap */
  scr_filemap_set_dataset(map, id, scr_my_rank_world, send_hash);
  scr_filemap_write(scr_map_file, map);

  /* TODO: at this point, we could delete descriptors for other
   * ranks for this checkpoint */

  /* free off our send hash */
  scr_hash_delete(&send_hash);

  return SCR_SUCCESS;
}
Beispiel #24
0
int main(int argc, char* argv[])
{
  int i, j;
  int index = 1;

  /* print usage if not enough arguments were given */
  if (argc < 2) {
    printf("Usage: scr_rebuild_xor <size> <root> <missing_xor_filename> <ordered_remaining_xor_filenames>\n");
    return 1;
  }

  /* TODO: want to pass this on command line? */
  /* get current working directory */
  char dsetdir[SCR_MAX_FILENAME];
  scr_getcwd(dsetdir, sizeof(dsetdir));

  /* create and reduce path for dataset */
  scr_path* path_dset = scr_path_from_str(dsetdir);
  scr_path_reduce(path_dset);

  /* allocate buffers */
  char* buffer_A = malloc(buffer_size * sizeof(char));
  char* buffer_B = malloc(buffer_size * sizeof(char));
  if (buffer_A == NULL || buffer_B == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the size of the XOR set */
  int xor_set_size = (int) strtol(argv[index++], (char **)NULL, 10);
  if (xor_set_size <= 0) {
    scr_err("Invalid XOR set size argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* allocate memory for data structures based on the XOR set size */
  int*   num_files  = malloc(xor_set_size * sizeof(int));
  int*   offsets    = malloc(xor_set_size * sizeof(int));
  char** xor_files  = malloc(xor_set_size * sizeof(char*));
  int*   xor_fds    = malloc(xor_set_size * sizeof(int));
  scr_hash** xor_headers = malloc(xor_set_size * sizeof(scr_hash*));
  if (num_files == NULL || offsets == NULL || xor_files == NULL || xor_fds == NULL || xor_headers == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the rank of the missing process (the root) */
  int root = (int) strtol(argv[index++], (char **)NULL, 10);
  if (root < 0 || root >= xor_set_size) {
    scr_err("Invalid root argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the missing xor filename */
  xor_files[0] = strdup(argv[index++]);
  if (xor_files[0] == NULL) {
    scr_err("Failed to dup XOR filename @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the xor filenames (expected to be in order of XOR segment number) */
  /* we order ranks so that root is index 0, the rank to the right of root is index 1, and so on */
  for (i=0; i < xor_set_size; i++) {
    xor_headers[i] = scr_hash_new();

    /* we'll get the XOR file name for root from the header stored in the XOR file of the partner */
    if (i == root) {
      continue;
    }

    /* adjust the index relative to root */
    j = i - root;
    if (j < 0) {
      j += xor_set_size;
    }

    /* copy the XOR file name */
    xor_files[j] = strdup(argv[index++]);
    if (xor_files[j] == NULL) {
      scr_err("Failed to dup XOR filename @ %s:%d",
        __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* open each of the xor files and read in the headers */
  for (i=1; i < xor_set_size; i++) {
    /* open each xor file for reading */
    xor_fds[i] = scr_open(xor_files[i], O_RDONLY);
    if (xor_fds[i] < 0) {
      scr_err("Opening xor segment file: scr_open(%s) errno=%d %s @ %s:%d",
        xor_files[i], errno, strerror(errno), __FILE__, __LINE__
      );
      return 1;
    }

    /* read the header from this xor file */
    if (scr_hash_read_fd(xor_files[i], xor_fds[i], xor_headers[i]) < 0) {
      scr_err("Failed to read XOR header from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* build header for missing XOR file */
  int partner_rank = -1;
  if (xor_set_size >= 2) {
    scr_hash_merge(xor_headers[0], xor_headers[1]);

    /* fetch our own file list from rank to our right */
    scr_hash* rhs_hash = scr_hash_get(xor_headers[1], SCR_KEY_COPY_XOR_PARTNER);
    scr_hash* current_hash = scr_hash_new();
    scr_hash_merge(current_hash, rhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT, current_hash);

    /* we are the partner to the rank to our left */
    scr_hash* lhs_hash = scr_hash_get(xor_headers[xor_set_size-1], SCR_KEY_COPY_XOR_CURRENT);
    scr_hash* partner_hash = scr_hash_new();
    scr_hash_merge(partner_hash, lhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_PARTNER, partner_hash);

    /* get global rank of partner */
    if (scr_hash_util_get_int(lhs_hash, SCR_KEY_COPY_XOR_RANK, &partner_rank) != SCR_SUCCESS) {
      scr_err("Failed to read partner rank from XOR file header in %s @ %s:%d",
        xor_files[xor_set_size-1], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* get a pointer to the current hash for the missing rank */
  scr_hash* missing_current_hash = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT);

  /* read the rank */
  int my_rank = -1;
  if (scr_hash_util_get_int(missing_current_hash, SCR_KEY_COPY_XOR_RANK, &my_rank) != SCR_SUCCESS) {
    scr_err("Failed to read rank from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get the dataset */
  scr_dataset* dataset = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_DATASET);

  /* read the dataset id */
  int dset_id = -1;
  if (scr_dataset_get_id(dataset, &dset_id) != SCR_SUCCESS) {
    scr_err("Failed to read dataset id from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* read the ranks */
  int num_ranks = -1;
  if (scr_hash_util_get_int(xor_headers[0], SCR_KEY_COPY_XOR_RANKS, &num_ranks) != SCR_SUCCESS) {
    scr_err("Failed to read ranks from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get name of partner's fmap */
  scr_path* path_partner_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_partner_map, "fmap.%d.scr", partner_rank);

  /* extract partner's flush descriptor */
  scr_hash* flushdesc = scr_hash_new();
  scr_filemap* partner_map = scr_filemap_new();
  scr_filemap_read(path_partner_map, partner_map);
  scr_filemap_get_flushdesc(partner_map, dset_id, partner_rank, flushdesc);
  scr_filemap_delete(&partner_map);

  /* delete partner map path */
  scr_path_delete(&path_partner_map);

  /* determine whether we should preserve user directories */
  int preserve_dirs = 0;
  scr_hash_util_get_int(flushdesc, SCR_SCAVENGE_KEY_PRESERVE, &preserve_dirs);

  /* read the chunk size */
  unsigned long chunk_size = 0;
  if (scr_hash_util_get_unsigned_long(xor_headers[0], SCR_KEY_COPY_XOR_CHUNK, &chunk_size) != SCR_SUCCESS) {
    scr_err("Failed to read chunk size from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* determine number of files each member wrote in XOR set */
  for (i=0; i < xor_set_size; i++) {
    /* record the number of files for this rank */
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);
    if (scr_hash_util_get_int(current_hash, SCR_KEY_COPY_XOR_FILES, &num_files[i]) != SCR_SUCCESS) {
      scr_err("Failed to read number of files from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }
  
  /* count the total number of files and set the offsets array */
  int total_num_files = 0;
  for (i=0; i < xor_set_size; i++) {
    offsets[i] = total_num_files;
    total_num_files += num_files[i];
  }

  /* allocate space for a file descriptor, file name pointer, and filesize for each user file */
  int* user_fds                 = (int*)           malloc(total_num_files * sizeof(int));
  char** user_files             = (char**)         malloc(total_num_files * sizeof(char*));
  char** user_rel_files         = (char**)         malloc(total_num_files * sizeof(char*));
  unsigned long* user_filesizes = (unsigned long*) malloc(total_num_files * sizeof(unsigned long));
  if (user_fds == NULL || user_files == NULL || user_rel_files == NULL || user_filesizes == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* get file name, file size, and open each of the user files that we have */
  for (i=0; i < xor_set_size; i++) {
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);

    /* for each file belonging to this rank, get filename, filesize, and open file */
    for (j=0; j < num_files[i]; j++) {
      int offset = offsets[i] + j;

      /* get the meta data for this file */
      scr_meta* meta = scr_hash_get_kv_int(current_hash, SCR_KEY_COPY_XOR_FILE, j);
      if (meta == NULL) {
        scr_err("Failed to read meta data for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* record the filesize of this file */
      if (scr_meta_get_filesize(meta, &user_filesizes[offset]) != SCR_SUCCESS) {
        scr_err("Failed to read filesize field for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* get filename */
      char* origname;
      if (scr_meta_get_origname(meta, &origname) != SCR_SUCCESS) {
        scr_err("Failed to read original name for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* construct full path to user file */
      scr_path* path_user_full = scr_path_from_str(origname);
      if (preserve_dirs) {
        /* get original path of file */
        char* origpath;
        if (scr_meta_get_origpath(meta, &origpath) != SCR_SUCCESS) {
          scr_err("Failed to read original path for file %d in %s @ %s:%d",
            j, xor_files[i], __FILE__, __LINE__
          );
          return 1;
        }

        /* construct full path to file */
        scr_path_prepend_str(path_user_full, origpath);
      } else {
        /* construct full path to file */
        scr_path_prepend(path_user_full, path_dset);
      }

      /* reduce path to user file */
      scr_path_reduce(path_user_full);

      /* make a copy of the full path */
      user_files[offset] = scr_path_strdup(path_user_full);

      /* make a copy of relative path */
      scr_path* path_user_rel = scr_path_relative(path_dset, path_user_full);
      user_rel_files[offset] = scr_path_strdup(path_user_rel);
      scr_path_delete(&path_user_rel);

      /* free the full path */
      scr_path_delete(&path_user_full);

      /* open the file */
      if (i == 0) {
        /* create directory for file */
        scr_path* user_dir_path = scr_path_from_str(user_files[offset]);
        scr_path_reduce(user_dir_path);
        scr_path_dirname(user_dir_path);
        if (! scr_path_is_null(user_dir_path)) {
          char* user_dir = scr_path_strdup(user_dir_path);
          mode_t mode_dir = scr_getmode(1, 1, 1);
          if (scr_mkdir(user_dir, mode_dir) != SCR_SUCCESS) {
            scr_err("Failed to create directory for user file %s @ %s:%d",
              user_dir, __FILE__, __LINE__
            );
            return 1;
          }
          scr_free(&user_dir);
        }
        scr_path_delete(&user_dir_path);

        /* open missing file for writing */
        mode_t mode_file = scr_getmode(1, 1, 0);
        user_fds[offset] = scr_open(user_files[offset], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for writing: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      } else {
        /* open existing file for reading */
        user_fds[offset] = scr_open(user_files[offset], O_RDONLY);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for reading: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      }
    }
  }

  /* finally, open the xor file for the missing rank */
  mode_t mode_file = scr_getmode(1, 1, 0);
  xor_fds[0] = scr_open(xor_files[0], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
  if (xor_fds[0] < 0) {
    scr_err("Opening xor file to be reconstructed: scr_open(%s) errno=%d %s @ %s:%d",
      xor_files[0], errno, strerror(errno), __FILE__, __LINE__
    );
    return 1;
  }

  int rc = 0;

  /* write the header to the XOR file of the missing rank */
  if (scr_hash_write_fd(xor_files[0], xor_fds[0], xor_headers[0]) < 0) {
    rc = 1;
  }

  /* this offset array records the current position we are in the logical file for each rank */
  unsigned long* offset = malloc(xor_set_size * sizeof(unsigned long));
  if (offset == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }
  for (i=0; i < xor_set_size; i++) {
    offset[i] = 0;
  }

  unsigned long write_pos = 0;
  int chunk_id;
  for (chunk_id = 0; chunk_id < xor_set_size && rc == 0; chunk_id++) {
    size_t nread = 0;
    while (nread < chunk_size && rc == 0) {
      /* read upto buffer_size bytes at a time */
      size_t count = chunk_size - nread;
      if (count > buffer_size) {
        count = buffer_size;
      }

      /* clear our buffer */
      memset(buffer_A, 0, count);

      /* read a segment from each rank and XOR it into our buffer */
      for (i=1; i < xor_set_size; i++) {
        /* read the next set of bytes for this chunk from my file into send_buf */
        if (chunk_id != ((i + root) % xor_set_size)) {
          /* read chunk from the logical file for this rank */
          if (scr_read_pad_n(num_files[i], &user_files[offsets[i]], &user_fds[offsets[i]],
                             buffer_B, count, offset[i], &user_filesizes[offsets[i]]) != SCR_SUCCESS)
          {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
          offset[i] += count;
        } else {
          /* read chunk from the XOR file for this rank */
          if (scr_read_attempt(xor_files[i], xor_fds[i], buffer_B, count) != count) {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
        }

        /* TODO: XORing with unsigned long would be faster here (if chunk size is multiple of this size) */
        /* merge the blocks via xor operation */
        for (j = 0; j < count; j++) {
          buffer_A[j] ^= buffer_B[j];
        }
      }

      /* at this point, we have the data from the missing rank, write it out */
      if (chunk_id != root) {
        /* write chunk to logical file for the missing rank */
        if (scr_write_pad_n(num_files[0], &user_files[0], &user_fds[0],
                            buffer_A, count, write_pos, &user_filesizes[0]) != SCR_SUCCESS)
        {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
        write_pos += count;
      } else {
        /* write chunk to xor file for the missing rank */
        if (scr_write_attempt(xor_files[0], xor_fds[0], buffer_A, count) != count) {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
      }

      nread += count;
    }
  }

  /* close each of the user files */
  for (i=0; i < total_num_files; i++) {
    if (scr_close(user_files[i], user_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* close each of the XOR files */
  for (i=0; i < xor_set_size; i++) {
    if (scr_close(xor_files[i], xor_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* if the write failed, delete the files we just wrote, and return an error */
  if (rc != 0) {
    for (j=0; j < num_files[0]; j++) {
      scr_file_unlink(user_files[j]);
    }
    scr_file_unlink(xor_files[0]);
    return 1;
  }

  /* check that filesizes are correct */
  unsigned long filesize;
  for (j=0; j < num_files[0]; j++) {
    filesize = scr_file_size(user_files[j]);
    if (filesize != user_filesizes[j]) {
      /* the filesize check failed, so delete the file */
      scr_file_unlink(user_files[j]);

      /* mark the file as incomplete */
      scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
      scr_meta_set_complete(meta, 0);

      rc = 1;
    }
  }
  /* TODO: we didn't record the filesize of the XOR file for the missing rank anywhere */

  /* create a filemap for this rank */
  scr_filemap* map = scr_filemap_new();
  if (map == NULL) {
    scr_err("Failed to allocate filemap @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* record the dataset information in the filemap */
  scr_filemap_set_dataset(map, dset_id, my_rank, dataset);

  /* write meta data for each of the user files and add each one to the filemap */
  for (j=0; j < num_files[0]; j++) {
    /* add user file to filemap and record meta data */
    char* user_file_relative = user_rel_files[j];
    scr_filemap_add_file(map, dset_id, my_rank, user_file_relative);
    scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
    scr_filemap_set_meta(map, dset_id, my_rank, user_file_relative, meta);
  }

  /* write meta data for xor file and add it to the filemap */
  scr_filemap_add_file(map, dset_id, my_rank, xor_files[0]);
  unsigned long full_chunk_filesize = scr_file_size(xor_files[0]);
  int missing_complete = 1;
  scr_meta* meta_chunk = scr_meta_new();
  scr_meta_set_filename(meta_chunk, xor_files[0]);
  scr_meta_set_filetype(meta_chunk, SCR_META_FILE_XOR);
  scr_meta_set_filesize(meta_chunk, full_chunk_filesize);
  /* TODO: remove this from meta file, for now it's needed in scr_index.c */
  scr_meta_set_ranks(meta_chunk, num_ranks);
  scr_meta_set_complete(meta_chunk, missing_complete);
  scr_filemap_set_meta(map, dset_id, my_rank, xor_files[0], meta_chunk);

  /* set expected number of files for the missing rank */
  int expected_num_files = scr_filemap_num_files(map, dset_id, my_rank);
  scr_filemap_set_expected_files(map, dset_id, my_rank, expected_num_files);

  /* compute, check, and store crc values with files */
  for (j=0; j < num_files[0]; j++) {
    /* compute crc on user file */
    char* user_file_relative = user_rel_files[j];
    if (scr_compute_crc(map, dset_id, my_rank, user_file_relative) != SCR_SUCCESS) {
      /* the crc check failed, so delete the file */
      scr_file_unlink(user_files[j]);
      rc = 1;
    }
  }
  if (scr_compute_crc(map, dset_id, my_rank, xor_files[0]) != SCR_SUCCESS) {
    /* the crc check failed, so delete the file */
    scr_file_unlink(xor_files[0]);
    rc = 1;
  }

  /* store flush descriptor */
  scr_filemap_set_flushdesc(map, dset_id, my_rank, flushdesc);

  /* write filemap for this rank */
  scr_path* path_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_map, "fmap.%d.scr", my_rank);
  if (scr_filemap_write(path_map, map) != SCR_SUCCESS) {
    rc = 1;
  }
  scr_path_delete(&path_map);

  /* delete the map */
  scr_filemap_delete(&map);

  scr_meta_delete(&meta_chunk);

  /* delete the flush/scavenge descriptor */
  scr_hash_delete(&flushdesc);

  scr_free(&offset);

  for (i=0; i < total_num_files; i++) {
    scr_free(&user_rel_files[i]);
    scr_free(&user_files[i]);
  }

  scr_free(&user_filesizes);
  scr_free(&user_rel_files);
  scr_free(&user_files);
  scr_free(&user_fds);

  for (i=0; i < xor_set_size; i++) {
    scr_hash_delete(&xor_headers[i]);
  }

  for (i=0; i < xor_set_size; i++) {
    scr_free(&xor_files[i]);
  }

  scr_free(&xor_headers);
  scr_free(&xor_fds);
  scr_free(&xor_files);
  scr_free(&offsets);
  scr_free(&num_files);

  scr_free(&buffer_B);
  scr_free(&buffer_A);

  scr_path_delete(&path_dset);

  return rc;
}
Beispiel #25
0
int scr_mysql_register_job(const char* username, const char* jobname, unsigned long start, unsigned long* jobid)
{
  int rc = SCR_SUCCESS;

#ifdef HAVE_LIBMYSQLCLIENT
  /* lookup the id for our username */
  unsigned long username_id;
  rc = scr_mysql_read_write_id("usernames", username, &username_id);
  if (rc != SCR_SUCCESS) {
    scr_err("Failed to find username_id for %s @ %s:%d",
            username, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* lookup the id for our jobname */
  unsigned long jobname_id;
  rc = scr_mysql_read_write_id("jobnames", jobname, &jobname_id);
  if (rc != SCR_SUCCESS) {
    scr_err("Failed to find jobname_id for %s @ %s:%d",
            jobname, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* if this job already has a db id, return it */
  rc = scr_mysql_read_job(username_id, jobname_id, jobid);
  if (rc == SCR_SUCCESS) {
    return SCR_SUCCESS;
  }

  /* didn't find the job, so we need to insert a new record into the db */

  /* translate unix seconds since epoch into mysql datetime field */
  time_t start_time_t = (time_t) start;
  char* qsecs = scr_mysql_quote_seconds(&start_time_t);

  /* check that we got valid strings for each of our parameters */
  if (qsecs == NULL) {
    scr_err("Failed to escape and quote one or more arguments @ %s:%d",
            __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* construct the query */
  char query[1024];
  int n = snprintf(query, sizeof(query),
    "INSERT IGNORE"
    " INTO `jobs`"
    " (`id`,`username_id`,`jobname_id`,`start`)"
    " VALUES"
    " (NULL, %lu, %lu, %s)"
    " ;",
    username_id, jobname_id, qsecs
  );

  /* free the strings as they are now encoded into the query */
  scr_free(&qsecs);

  /* check that we were able to construct the query ok */
  if (n >= sizeof(query)) {
    scr_err("Insufficient buffer space (%lu bytes) to build query (%lu bytes) @ %s:%d",
            sizeof(query), n, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* execute the query */
  if (scr_db_debug >= 1) {
    scr_dbg(0, "%s", query);
  }
  if (mysql_real_query(&scr_mysql, query, (unsigned int) strlen(query))) {
    scr_err("Insert failed, query = (%s), error = (%s) @ %s:%d",
            query, mysql_error(&scr_mysql), __FILE__, __LINE__
    );
    /* don't return failure, since another process may have just beat us to the punch */
    /*return SCR_FAILURE;*/
  }

  /* now the job should be in the db, so read again to get its id */
  rc = scr_mysql_read_job(username_id, jobname_id, jobid);

#endif
  return rc;
}
Beispiel #26
0
/* distribute and rebuild files in cache */
int scr_cache_rebuild(scr_filemap* map)
{
  int rc = SCR_FAILURE;

  double time_start, time_end, time_diff;

  /* start timer */
  time_t time_t_start;
  if (scr_my_rank_world == 0) {
    time_t_start = scr_log_seconds();
    time_start = MPI_Wtime();
  }

  /* we set this variable to 1 if we actually try to distribute
   * files for a restart */
  int distribute_attempted = 0;

  /* clean any incomplete files from our cache */
  scr_cache_clean(map);

  /* get ordered list of datasets we have in our cache */
  int ndsets;
  int* dsets;
  scr_filemap_list_datasets(map, &ndsets, &dsets);

  /* TODO: put dataset selection logic into a function */

  /* TODO: also attempt to recover datasets which we were in the
   * middle of flushing */
  int current_id;
  int dset_index = 0;
  do {
    /* get the smallest index across all processes (returned in current_id),
     * this also updates our dset_index value if appropriate */
    scr_next_dataset(ndsets, dsets, &dset_index, &current_id);

    /* if we found a dataset, try to distribute and rebuild it */
    if (current_id != -1) {
      /* remember that we made an attempt to distribute at least one dataset */
      distribute_attempted = 1;
      
      /* log the attempt */
      if (scr_my_rank_world == 0) {
        scr_dbg(1, "Attempting to distribute and rebuild dataset %d", current_id);
        if (scr_log_enable) {
          time_t now = scr_log_seconds();
          scr_log_event("REBUILD STARTED", NULL, &current_id, &now, NULL);
        }
      }

      /* distribute dataset descriptor for this dataset */
      int rebuild_succeeded = 0;
      if (scr_distribute_datasets(map, current_id) == SCR_SUCCESS) {
        /* distribute redundancy descriptor for this dataset */
        scr_reddesc reddesc;
        if (scr_distribute_reddescs(map, current_id, &reddesc) == SCR_SUCCESS) {
          /* create a directory for this dataset */
          scr_cache_dir_create(&reddesc, current_id);

          /* distribute the files for this dataset */
          scr_distribute_files(map, &reddesc, current_id);

          /* rebuild files for this dataset */
          int tmp_rc = scr_reddesc_recover(map, &reddesc, current_id);
          if (tmp_rc == SCR_SUCCESS) {
            /* rebuild succeeded */
            rebuild_succeeded = 1;

            /* if we rebuild any checkpoint, return success */
            rc = SCR_SUCCESS;

            /* update scr_dataset_id */
            if (current_id > scr_dataset_id) {
              scr_dataset_id = current_id;
            }

            /* TODO: dataset may not be a checkpoint */
            /* update scr_checkpoint_id */
            if (current_id > scr_checkpoint_id) {
              scr_checkpoint_id = current_id;
            }

            /* update our flush file to indicate this dataset is in cache */
            scr_flush_file_location_set(current_id, SCR_FLUSH_KEY_LOCATION_CACHE);

            /* TODO: if storing flush file in control directory on each node,
             * if we find any process that has marked the dataset as flushed,
             * marked it as flushed in every flush file */

            /* TODO: would like to restore flushing status to datasets that
             * were in the middle of a flush, but we need to better manage
             * the transfer file to do this, so for now just forget about
             * flushing this dataset */
            scr_flush_file_location_unset(current_id, SCR_FLUSH_KEY_LOCATION_FLUSHING);
          }

          /* free redundancy descriptor */
          scr_reddesc_free(&reddesc);
        }
      }

      /* if the distribute or rebuild failed, delete the dataset */
      if (! rebuild_succeeded) {
        /* log that we failed */
        if (scr_my_rank_world == 0) {
          scr_dbg(1, "Failed to rebuild dataset %d", current_id);
          if (scr_log_enable) {
            time_t now = scr_log_seconds();
            scr_log_event("REBUILD FAILED", NULL, &current_id, &now, NULL);
          }
        }

        /* TODO: there is a bug here, since scr_cache_delete needs to read
         * the redundancy descriptor from the filemap in order to delete the
         * cache directory, but we may have failed to distribute the reddescs
         * above so not every task has one */

        /* rebuild failed, delete this dataset from cache */
        scr_cache_delete(map, current_id);
      } else {
        /* rebuid worked, log success */
        if (scr_my_rank_world == 0) {
          scr_dbg(1, "Rebuilt dataset %d", current_id);
          if (scr_log_enable) {
            time_t now = scr_log_seconds();
            scr_log_event("REBUILD SUCCEEDED", NULL, &current_id, &now, NULL);
          }
        }
      }
    }
  } while (current_id != -1);

  /* stop timer and report performance */
  if (scr_my_rank_world == 0) {
    time_end = MPI_Wtime();
    time_diff = time_end - time_start;

    if (distribute_attempted) {
      if (rc == SCR_SUCCESS) {
        scr_dbg(1, "Scalable restart succeeded for checkpoint %d, took %f secs",
          scr_checkpoint_id, time_diff
        );
        if (scr_log_enable) {
          scr_log_event("RESTART SUCCEEDED", NULL, &scr_checkpoint_id, &time_t_start, &time_diff);
        }
      } else {
        /* scr_checkpoint_id is not defined */
        scr_dbg(1, "Scalable restart failed, took %f secs", time_diff);
        if (scr_log_enable) {
          scr_log_event("RESTART FAILED", NULL, NULL, &time_t_start, &time_diff);
        }
      }
    }
  }

  /* free our list of dataset ids */
  scr_free(&dsets);

  return rc;
}
Beispiel #27
0
/* given a filename, its meta data, its list of segments, and list of destination containers,
 * copy file to container files */
static int scr_flush_file_to_containers(
  const char* file,
  scr_meta* meta,
  scr_hash* segments,
  const char* dst_dir)
{
  /* check that we got something for a source file */
  if (file == NULL || strcmp(file, "") == 0) {
    scr_err("Invalid source file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that our other arguments are valid */
  if (meta == NULL || segments == NULL) {
    scr_err("Invalid metadata or segments @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* open the file for reading */
  int fd_src = scr_open(file, O_RDONLY);
  if (fd_src < 0) {
    scr_err("Opening file to copy: scr_open(%s) errno=%d %s @ %s:%d",
      file, errno, strerror(errno), __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

#if !defined(__APPLE__)
  /* TODO:
  posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL)
  that tells the kernel that you don't ever need the pages
  from the file again, and it won't bother keeping them in the page cache.
  */
  posix_fadvise(fd_src, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL);
#endif

  /* get the buffer size we'll use to write to the file */
  unsigned long buf_size = scr_file_buf_size;

  /* allocate buffer to read in file chunks */
  char* buf = (char*) SCR_MALLOC(buf_size);

  /* initialize crc value */
  uLong crc;
  if (scr_crc_on_flush) {
    crc = crc32(0L, Z_NULL, 0);
  }

  int rc = SCR_SUCCESS;

  /* write out each segment */
  scr_hash_sort_int(segments, SCR_HASH_SORT_ASCENDING);
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(segments);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the container info for this segment */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get the offset into the container and the length of the segment (both in bytes) */
    char* container_name;
    unsigned long container_offset, segment_length;
    if (scr_container_get_name_offset_length(hash,
      &container_name, &container_offset, &segment_length) != SCR_SUCCESS)
    {
      scr_err("Failed to get segment offset and length @ %s:%d",
              __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

    /* build full name to destination file */
    scr_path* dst_path = scr_path_from_str(dst_dir);
    scr_path_append_str(dst_path, container_name);
    scr_path_reduce(dst_path);
    char* dst_file = scr_path_strdup(dst_path);

    /* open container file for writing -- we don't truncate here because more than one
     * process may be writing to the same file */
    int fd_container = scr_open(dst_file, O_WRONLY);
    if (fd_container < 0) {
      scr_err("Opening file for writing: scr_open(%s) errno=%d %s @ %s:%d",
        dst_file, errno, strerror(errno), __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

#if !defined(__APPLE__)
    /* TODO:
    posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL)
    that tells the kernel that you don't ever need the pages
    from the file again, and it won't bother keeping them in the page cache.
    */
    posix_fadvise(fd_container, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL);
#endif

    /* seek to offset within container */
    off_t pos = (off_t) container_offset;
    if (lseek(fd_container, pos, SEEK_SET) == (off_t)-1) {
      /* our seek failed, return an error */
      scr_err("Failed to seek to byte %lu in %s @ %s:%d",
        pos, dst_file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

    /* copy data from file into container in chunks */
    unsigned long remaining = segment_length;
    while (remaining > 0) {
      /* read / write up to buf_size bytes at a time from file */
      unsigned long count = remaining;
      if (count > buf_size) {
        count = buf_size;
      }

      /* attempt to read buf_size bytes from file */
      int nread = scr_read_attempt(file, fd_src, buf, count);

      /* if we read some bytes, write them out */
      if (nread > 0) {
        /* optionally compute crc value as we go */
        if (scr_crc_on_flush) {
          crc = crc32(crc, (const Bytef*) buf, (uInt) nread);
        }

        /* write our nread bytes out */
        int nwrite = scr_write_attempt(dst_file, fd_container, buf, nread);

        /* check for a write error or a short write */
        if (nwrite != nread) {
          /* write had a problem, stop copying and return an error */
          rc = SCR_FAILURE;
          break;
        }

        /* subtract the bytes we've processed from the number remaining */
        remaining -= (unsigned long) nread;
      }

      /* assume a short read is an error */
      if (nread < count) {
        /* read had a problem, stop copying and return an error */
        rc = SCR_FAILURE;
        break;
      }

      /* check for a read error, stop copying and return an error */
      if (nread < 0) {
        /* read had a problem, stop copying and return an error */
        rc = SCR_FAILURE;
        break;
      }
    }

    /* close container */
    if (scr_close(dst_file, fd_container) != SCR_SUCCESS) {
      rc = SCR_FAILURE;
    }

    /* free the container file name and path */
    scr_free(&dst_file);
    scr_path_delete(&dst_path);
  }

  /* close the source file */
  if (scr_close(file, fd_src) != SCR_SUCCESS) {
    rc = SCR_FAILURE;
  }

  /* free buffer */
  scr_free(&buf);

  /* verify / set crc value */
  if (rc == SCR_SUCCESS) {
    uLong crc2;
    if (scr_crc_on_flush) {
      if (scr_meta_get_crc32(meta, &crc2) == SCR_SUCCESS) {
        /* if a crc is already set in the meta data, check that we computed the same value */
        if (crc != crc2) {
          scr_err("CRC32 mismatch detected when flushing file %s @ %s:%d",
            file, __FILE__, __LINE__
          );
          rc = SCR_FAILURE;
        }
      } else {
        /* if there is no crc set, let's set it now */
        scr_meta_set_crc32(meta, crc);
      }
    }
  }

  return rc;
}
Beispiel #28
0
/* flushes data for files specified in file_list (with flow control),
 * and records status of each file in data */
static int scr_flush_data(scr_hash* file_list, scr_hash* data)
{
  int flushed = SCR_SUCCESS;

  /* flow control the write among processes */
  if (scr_my_rank_world == 0) {
    /* first, flush each of my files and fill in meta data structure */
    if (scr_flush_files_list(file_list, data) != SCR_SUCCESS) {
      flushed = SCR_FAILURE;
    }

    /* now, have a sliding window of w processes write simultaneously */
    int w = scr_flush_width;
    if (w > (scr_ranks_world - 1)) {
      w = scr_ranks_world - 1;
    }

    /* allocate MPI_Request arrays and an array of ints */
    int*         flags = (int*)         SCR_MALLOC(2 * w * sizeof(int));
    MPI_Request* req   = (MPI_Request*) SCR_MALLOC(2 * w * sizeof(MPI_Request));
    MPI_Status status;

    int i = 1;
    int outstanding = 0;
    int index = 0;
    while (i < scr_ranks_world || outstanding > 0) {
      /* issue up to w outstanding sends and receives */
      while (i < scr_ranks_world && outstanding < w) {
        /* post a receive for the response message we'll get back when rank i is done */
        MPI_Irecv(&flags[w + index], 1, MPI_INT, i, 0, scr_comm_world, &req[w + index]);

        /* post a send to tell rank i to start */
        flags[index] = flushed;
        MPI_Isend(&flags[index], 1, MPI_INT, i, 0, scr_comm_world, &req[index]);

        /* update the number of outstanding requests */
        i++;
        outstanding++;
        index++;
      }

      /* wait to hear back from any rank */
      MPI_Waitany(w, &req[w], &index, &status);

      /* someone responded, the send to this rank should also be done, so complete it */
      MPI_Wait(&req[index], &status);

      /* determine whether this rank flushed its file successfully */
      if (flags[w + index] != SCR_SUCCESS) {
        flushed = SCR_FAILURE;
      }

      /* one less request outstanding now */
      outstanding--;
    }

    /* free the MPI_Request arrays */
    scr_free(&req);
    scr_free(&flags);
  } else {
    /* receive signal to start */
    int start = 0;
    MPI_Status status;
    MPI_Recv(&start, 1, MPI_INT, 0, 0, scr_comm_world, &status);

    /* flush files if we've had success so far, otherwise skip the flush and return failure */
    if (start == SCR_SUCCESS) {
      /* flush each of my files and fill in meta data strucutre */
      if (scr_flush_files_list(file_list, data) != SCR_SUCCESS) {
        flushed = SCR_FAILURE;
      }
    } else {
      /* someone failed before we even started, so don't bother */
      flushed = SCR_FAILURE;
    }

    /* send message to rank 0 to report that we're done */
    MPI_Send(&flushed, 1, MPI_INT, 0, 0, scr_comm_world);
  }

  /* determine whether everyone wrote their files ok */
  if (scr_alltrue((flushed == SCR_SUCCESS))) {
    return SCR_SUCCESS;
  }
  return SCR_FAILURE;
}
Beispiel #29
0
/* flush files specified in list, and record corresponding entries for summary file */
static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary)
{
  /* assume we will succeed in this flush */
  int rc = SCR_SUCCESS;

  /* flush each of my files and fill in summary data structure */
  scr_hash_elem* elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (elem = scr_hash_elem_first(files);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(elem);

    /* convert file to path and extract name of file */
    scr_path* path_name = scr_path_from_str(file);
    scr_path_basename(path_name);

    /* get the hash for this element */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get meta data for this file */
    scr_meta* meta = scr_hash_get(hash, SCR_KEY_META);

    /* if segments are defined, we flush the file to its containers,
     * otherwise we copy the file out as is */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* TODO: PRESERVE get original filename here */

      /* add this file to the summary file */
      char* name = scr_path_strdup(path_name);
      scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
      scr_free(&name);

// USERDEF fixme!
      /* flush the file to the containers listed in its segmenets */
      if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) {
        /* successfully flushed this file, record the filesize */
        unsigned long filesize = 0;
        if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
          scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
        }

        /* record the crc32 if one was computed */
        uLong crc = 0;
        if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
          scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
        }

        /* record segment information in summary file */
        scr_hash* segments_copy = scr_hash_new();
        scr_hash_merge(segments_copy, segments);
        scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy);
      } else {
        /* the flush failed */
        rc = SCR_FAILURE;

        /* explicitly mark file as incomplete */
        scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
      }
    } else {
      /* get directory to flush file to */
      char* dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) {
        /* create full path of destination file */
        scr_path* path_full = scr_path_from_str(dir);
        scr_path_append(path_full, path_name);

        /* get relative path to flushed file from SCR_PREFIX directory */
        scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full);
        if (! scr_path_is_null(path_relative)) {
          /* record the name of the file in the summary hash, and get reference to a hash for this file */
          char* name = scr_path_strdup(path_relative);
          scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
          scr_free(&name);

          /* flush the file and fill in the meta data for this file */
          if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) {
            /* successfully flushed this file, record the filesize */
            unsigned long filesize = 0;
            if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
              scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
            }

            /* record the crc32 if one was computed */
            uLong crc = 0;
            if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
              scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
            }
          } else {
            /* the flush failed */
            rc = SCR_FAILURE;

            /* explicitly mark incomplete files */
            scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
          }
        } else {
          scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d",
            dir, scr_prefix, __FILE__, __LINE__
          );
        }

        /* free relative and full paths */
        scr_path_delete(&path_relative);
        scr_path_delete(&path_full);
      } else {
        scr_abort(-1, "Failed to read directory to flush file to @ %s:%d",
          __FILE__, __LINE__
        );
      }
    }

    /* free the file name path */
    scr_path_delete(&path_name);
  }

  return rc;
}
Beispiel #30
0
/* flushes file named in src_file to dst_dir and fills in meta based on flush,
 * returns success of flush */
static int scr_flush_a_file(const char* src_file, const char* dst_dir, scr_meta* meta)
{
  int flushed = SCR_SUCCESS;
  int tmp_rc;

  /* build full name to destination file */
  scr_path* dst_path = scr_path_from_str(src_file);
  scr_path_basename(dst_path);
  scr_path_prepend_str(dst_path, dst_dir);
  scr_path_reduce(dst_path);
  char* dst_file = scr_path_strdup(dst_path);

  /* copy file */
  int crc_valid = 0;
  uLong crc;
  uLong* crc_p = NULL;
  if (scr_crc_on_flush) {
    crc_valid = 1;
    crc_p = &crc;
  }
  tmp_rc = scr_file_copy(src_file, dst_file, scr_file_buf_size, crc_p);
  if (tmp_rc != SCR_SUCCESS) {
    crc_valid = 0;
    flushed = SCR_FAILURE;
  }
  scr_dbg(2, "scr_flush_a_file: Read and copied %s to %s with success code %d @ %s:%d",
    src_file, dst_file, tmp_rc, __FILE__, __LINE__
  );

  /* if file has crc32, check it against the one computed during the copy,
   * otherwise if scr_crc_on_flush is set, record crc32 */
  if (crc_valid) {
    uLong crc_meta;
    if (scr_meta_get_crc32(meta, &crc_meta) == SCR_SUCCESS) {
      if (crc != crc_meta) {
        /* detected a crc mismatch during the copy */

        /* TODO: unlink the copied file */
        /* scr_file_unlink(dst_file); */

        /* mark the file as invalid */
        scr_meta_set_complete(meta, 0);

        flushed = SCR_FAILURE;
        scr_err("scr_flush_a_file: CRC32 mismatch detected when flushing file %s to %s @ %s:%d",
          src_file, dst_file, __FILE__, __LINE__
        );

        /* TODO: would be good to log this, but right now only rank 0 can write log entries */
        /*
        if (scr_log_enable) {
          time_t now = scr_log_seconds();
          scr_log_event("CRC32 MISMATCH", dst_file, NULL, &now, NULL);
        }
        */
      }
    } else {
      /* the crc was not already in the metafile, but we just computed it, so set it */
      scr_meta_set_crc32(meta, crc);
    }
  }

  /* TODO: check that written filesize matches expected filesize */

  /* fill out meta data, set complete field based on flush success */
  /* (we don't update the meta file here, since perhaps the file in cache is ok and only the flush failed) */
  int complete = (flushed == SCR_SUCCESS);
  scr_meta_set_complete(meta, complete);

  /* free destination file string and path */
  scr_free(&dst_file);
  scr_path_delete(&dst_path);

  return flushed;
}