Example #1
0
File: scr_env.c Project: LLNL/scr
/* environment specific init/finalize */
int scr_env_init(void)
{

#ifdef SCR_RESOURCE_MANAGER_PMIX
  /* init pmix */
  int retval = PMIx_Init(&scr_pmix_proc, NULL, 0);
  if (retval != PMIX_SUCCESS) {
    scr_err("PMIx_Init failed: rc=%d @ %s:%d",
      retval, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }
  scr_dbg(1, "PMIx_Init succeeded @ %s:%d", __FILE__, __LINE__);
#endif /* SCR_MACHINE_TYPE == SCR_PMIX */

#ifdef HAVE_LIBCPPR
  /* attempt to init cppr */
  int cppr_ret = cppr_status();
  if (cppr_ret != CPPR_SUCCESS) {
    scr_abort(-1, "libcppr cppr_status() failed: %d '%s' @ %s:%d",
              cppr_ret, cppr_err_to_str(cppr_ret), __FILE__, __LINE__
    );
  }
  scr_dbg(1, "#bold CPPR is present @ %s:%d", __FILE__, __LINE__);
#endif /* HAVE_LIBCPPR */

    return SCR_SUCCESS;
}
Example #2
0
/* write current working directory to buf */
int scr_getcwd(char* buf, size_t size)
{
  int rc = SCR_SUCCESS;
  if (getcwd(buf, size) == NULL) {
    scr_abort(-1, "Problem reading current working directory (getcwd() errno=%d %s) @ %s:%d",
              errno, strerror(errno), __FILE__, __LINE__
    );
    rc = SCR_FAILURE;
  }
  return rc;
}
Example #3
0
/* allocates a new hash */
scr_hash* scr_hash_new()
{
  scr_hash* hash = (scr_hash*) malloc(sizeof(scr_hash));
  if (hash != NULL) {
    LIST_INIT(hash);
  } else {
    scr_abort(-1, "Failed to allocate memory for hash object @ %s:%d",
            __FILE__, __LINE__
    );
  }
  return hash;
}
Example #4
0
/* allocates a new hash element */
static scr_hash_elem* scr_hash_elem_new()
{
  scr_hash_elem* elem = (scr_hash_elem*) malloc(sizeof(scr_hash_elem));
  if (elem != NULL) {
    elem->key  = NULL;
    elem->hash = NULL;
  } else {
    scr_abort(-1, "Failed to allocate memory for hash element @ %s:%d",
            __FILE__, __LINE__
    );
  }
  return elem;
}
Example #5
0
/* searches for name and returns a character pointer to its value if set,
 * returns NULL if not found */
char* scr_param_get(char* name)
{
  char* value = NULL;

  /* see if this parameter is one which is restricted from user */
  scr_hash* no_user = scr_hash_get(scr_no_user_hash, name);

  /* if parameter is set in environment, return that value */
  if (no_user == NULL && getenv(name) != NULL) {
    /* we don't just return the getenv value directly because that causes
     * segfaults on some systems, so instead we add it to a hash and return
     * the pointer into the hash */

    /* try to lookup the value for this name in case we've already cached it */
    if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
      /* it's not in the hash yet, so add it */
      char* tmp_value = strdup(getenv(name));
      scr_hash_util_set_str(scr_env_hash, name, tmp_value);
      scr_free(&tmp_value);

      /* now issue our lookup again */
      if (scr_hash_util_get_str(scr_env_hash, name, &value) != SCR_SUCCESS) {
        /* it's an error if we don't find it this time */
        scr_abort(-1, "Failed to find value for %s in env hash @ %s:%d",
          name, __FILE__, __LINE__
        );
      }
    }
    
    return value;
  }

  /* otherwise, if parameter is set in user configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_user_hash, name);
  if (no_user == NULL && value != NULL) {
    return value;
  }

  /* otherwise, if parameter is set in system configuration file,
   * return that value */
  value = scr_hash_elem_get_first_val(scr_system_hash, name);
  if (value != NULL) {
    return value;
  }

  /* parameter not found, return NULL */
  return NULL;
}
Example #6
0
/* allocates a new string (to be freed with scr_free)
 * that is path to user config file */
static char* user_config_path()
{
  char* file = NULL;

  /* first, use SCR_CONF_FILE if it's set */
  char* value = getenv("SCR_CONF_FILE");
  if (value != NULL) {
    file = strdup(value);
    return file;
  }

  /* otherwise, look in the prefix directory */
  char* prefix = NULL;
  value = getenv("SCR_PREFIX");
  if (value != NULL) {
    /* user set SCR_PREFIX, strdup that value */
    prefix = strdup(value);
  } else {
    /* if user didn't set with SCR_PREFIX,
     * pick up the current working directory as a default */
    char current_dir[SCR_MAX_FILENAME];
    if (scr_getcwd(current_dir, sizeof(current_dir)) != SCR_SUCCESS) {
      scr_abort(-1, "Problem reading current working directory @ %s:%d",
        __FILE__, __LINE__
      );
    }
    prefix = strdup(current_dir);
  }

  /* couldn't find a prefix directory, so bail */
  if (prefix == NULL) {
    return file;
  }

  /* tack file name on to directory */
  scr_path* prefix_path = scr_path_from_str(prefix);
  scr_path_append_str(prefix_path, SCR_CONFIG_FILE_USER);
  file = scr_path_strdup(prefix_path);
  scr_path_delete(&prefix_path);

  /* free the prefix dir which we strdup'd */
  scr_free(&prefix);

  return file;
}
Example #7
0
static int scr_compute_crc(scr_filemap* map, int id, int rank, const char* file)
{
  /* compute crc for the file */
  uLong crc_file;
  if (scr_crc32(file, &crc_file) != SCR_SUCCESS) {
    scr_err("Failed to compute crc for file %s @ %s:%d",
      file, __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* allocate a new meta data object */
  scr_meta* meta = scr_meta_new();
  if (meta == NULL) {
    scr_abort(-1, "Failed to allocate meta data object @ %s:%d",
      __FILE__, __LINE__
    );
  }

  /* read meta data from filemap */
  if (scr_filemap_get_meta(map, id, rank, file, meta) != SCR_SUCCESS) {
    return SCR_FAILURE;
  }

  int rc = SCR_SUCCESS;

  /* read crc value from meta data */
  uLong crc_meta;
  if (scr_meta_get_crc32(meta, &crc_meta) == SCR_SUCCESS) {
    /* check that the values are the same */
    if (crc_file != crc_meta) {
      rc = SCR_FAILURE;
    }
  } else {
    /* record crc in filemap */
    scr_meta_set_crc32(meta, crc_file);
    scr_filemap_set_meta(map, id, rank, file, meta);
  }

  /* free our meta data object */
  scr_meta_delete(&meta);

  return rc;
}
Example #8
0
static int scr_swap_files_copy(
  int have_outgoing, const char* file_send, scr_meta* meta_send, int rank_send, uLong* crc32_send,
  int have_incoming, const char* file_recv, scr_meta* meta_recv, int rank_recv, uLong* crc32_recv,
  MPI_Comm comm)
{
  int rc = SCR_SUCCESS;
  MPI_Request request[2];
  MPI_Status  status[2];

  /* allocate MPI send buffer */
  char *buf_send = NULL;
  if (have_outgoing) {
    buf_send = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
    if (buf_send == NULL) {
      scr_abort(-1, "Allocating memory: malloc(%ld) errno=%d %s @ %s:%d",
              scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
      );
      return SCR_FAILURE;
    }
  }

  /* allocate MPI recv buffer */
  char *buf_recv = NULL;
  if (have_incoming) {
    buf_recv = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
    if (buf_recv == NULL) {
      scr_abort(-1, "Allocating memory: malloc(%ld) errno=%d %s @ %s:%d",
              scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
      );
      return SCR_FAILURE;
    }
  }

  /* open the file to send: read-only mode */
  int fd_send = -1;
  if (have_outgoing) {
    fd_send = scr_open(file_send, O_RDONLY);
    if (fd_send < 0) {
      scr_abort(-1, "Opening file for send: scr_open(%s, O_RDONLY) errno=%d %s @ %s:%d",
              file_send, errno, strerror(errno), __FILE__, __LINE__
      );
    }
  }

  /* open the file to recv: truncate, write-only mode */
  int fd_recv = -1;
  if (have_incoming) {
    mode_t mode_file = scr_getmode(1, 1, 0);
    fd_recv = scr_open(file_recv, O_WRONLY | O_CREAT | O_TRUNC, mode_file);
    if (fd_recv < 0) {
      scr_abort(-1, "Opening file for recv: scr_open(%s, O_WRONLY | O_CREAT | O_TRUNC, ...) errno=%d %s @ %s:%d",
              file_recv, errno, strerror(errno), __FILE__, __LINE__
      );
    }
  }

  /* exchange file chunks */
  int nread, nwrite;
  int sending = 0;
  if (have_outgoing) {
    sending = 1;
  }
  int receiving = 0;
  if (have_incoming) {
    receiving = 1;
  }
  while (sending || receiving) {
    /* if we are still receiving a file, post a receive */
    if (receiving) {
      MPI_Irecv(buf_recv, scr_mpi_buf_size, MPI_BYTE, rank_recv, 0, comm, &request[0]);
    }

    /* if we are still sending a file, read a chunk, send it, and wait */
    if (sending) {
      nread = scr_read(file_send, fd_send, buf_send, scr_mpi_buf_size);
      if (scr_crc_on_copy && nread > 0) {
        *crc32_send = crc32(*crc32_send, (const Bytef*) buf_send, (uInt) nread);
      }
      if (nread < 0) {
        nread = 0;
      }
      MPI_Isend(buf_send, nread, MPI_BYTE, rank_send, 0, comm, &request[1]);
      MPI_Wait(&request[1], &status[1]);
      if (nread < scr_mpi_buf_size) {
        sending = 0;
      }
    }

    /* if we are still receiving a file,
     * wait on our receive to complete and write the data */
    if (receiving) {
      MPI_Wait(&request[0], &status[0]);
      MPI_Get_count(&status[0], MPI_BYTE, &nwrite);
      if (scr_crc_on_copy && nwrite > 0) {
        *crc32_recv = crc32(*crc32_recv, (const Bytef*) buf_recv, (uInt) nwrite);
      }
      scr_write(file_recv, fd_recv, buf_recv, nwrite);
      if (nwrite < scr_mpi_buf_size) {
        receiving = 0;
      }
    }
  }

  /* close the files */
  if (have_outgoing) {
    scr_close(file_send, fd_send);
  }
  if (have_incoming) {
    scr_close(file_recv, fd_recv);
  }

  /* set crc field on our file if it hasn't been set already */
  if (scr_crc_on_copy && have_outgoing) {
    uLong meta_send_crc;
    if (scr_meta_get_crc32(meta_send, &meta_send_crc) != SCR_SUCCESS) {
      scr_meta_set_crc32(meta_send, *crc32_send);
    } else {
      /* TODO: we could check that the crc on the sent file matches and take some action if not */
    }
  }

  /* free the MPI buffers */
  scr_align_free(&buf_recv);
  scr_align_free(&buf_send);

  return rc;
}
Example #9
0
static int scr_swap_files_move(
  int have_outgoing, const char* file_send, scr_meta* meta_send, int rank_send, uLong* crc32_send,
  int have_incoming, const char* file_recv, scr_meta* meta_recv, int rank_recv, uLong* crc32_recv,
  MPI_Comm comm)
{
  int rc = SCR_SUCCESS;
  MPI_Request request[2];
  MPI_Status  status[2];

  /* allocate MPI send buffer */
  char *buf_send = NULL;
  if (have_outgoing) {
    buf_send = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
    if (buf_send == NULL) {
      scr_abort(-1, "Allocating memory: malloc(%ld) errno=%d %s @ %s:%d",
              scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
      );
      return SCR_FAILURE;
    }
  }

  /* allocate MPI recv buffer */
  char *buf_recv = NULL;
  if (have_incoming) {
    buf_recv = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
    if (buf_recv == NULL) {
      scr_abort(-1, "Allocating memory: malloc(%ld) errno=%d %s @ %s:%d",
              scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
      );
      return SCR_FAILURE;
    }
  }

  /* since we'll overwrite our send file in place with the recv file,
   * which may be larger, we need to keep track of how many bytes we've
   * sent and whether we've sent them all */
  unsigned long filesize_send = 0;

  /* open our file */
  int fd = -1;
  if (have_outgoing) {
    /* we'll overwrite our send file (or just read it if there is no incoming) */
    filesize_send = scr_file_size(file_send);
    fd = scr_open(file_send, O_RDWR);
    if (fd < 0) {
      /* TODO: skip writes and return error? */
      scr_abort(-1, "Opening file for send/recv: scr_open(%s, O_RDWR) errno=%d %s @ %s:%d",
              file_send, errno, strerror(errno), __FILE__, __LINE__
      );
    }
  } else if (have_incoming) {
    /* if we're in this branch, then we only have an incoming file,
     * so we'll write our recv file from scratch */
    mode_t mode_file = scr_getmode(1, 1, 0);
    fd = scr_open(file_recv, O_WRONLY | O_CREAT | O_TRUNC, mode_file);
    if (fd < 0) {
      /* TODO: skip writes and return error? */
      scr_abort(-1, "Opening file for recv: scr_open(%s, O_WRONLY | O_CREAT | O_TRUNC, ...) errno=%d %s @ %s:%d",
              file_recv, errno, strerror(errno), __FILE__, __LINE__
      );
    }
  }

  /* exchange file chunks */
  int sending = 0;
  if (have_outgoing) {
    sending = 1;
  }
  int receiving = 0;
  if (have_incoming) {
    receiving = 1;
  }
  int nread, nwrite;
  off_t read_pos = 0, write_pos = 0;
  while (sending || receiving) {
    if (receiving) {
      /* prepare a buffer to receive up to scr_mpi_buf_size bytes */
      MPI_Irecv(buf_recv, scr_mpi_buf_size, MPI_BYTE, rank_recv, 0, comm, &request[0]);
    }

    if (sending) {
      /* compute number of bytes to read */
      unsigned long count = filesize_send - read_pos;
      if (count > scr_mpi_buf_size) {
        count = scr_mpi_buf_size;
      }

      /* read a chunk of up to scr_mpi_buf_size bytes into buf_send */
      lseek(fd, read_pos, SEEK_SET); /* seek to read position */
      nread = scr_read(file_send, fd, buf_send, count);
      if (scr_crc_on_copy && nread > 0) {
        *crc32_send = crc32(*crc32_send, (const Bytef*) buf_send, (uInt) nread);
      }
      if (nread < 0) {
        nread = 0;
      }
      read_pos += (off_t) nread; /* update read pointer */

      /* send chunk (if nread is smaller than scr_mpi_buf_size,
       * then we've read the whole file) */
      MPI_Isend(buf_send, nread, MPI_BYTE, rank_send, 0, comm, &request[1]);
      MPI_Wait(&request[1], &status[1]);

      /* check whether we've read the whole file */
      if (filesize_send == read_pos && count < scr_mpi_buf_size) {
        sending = 0;
      }
    }

    if (receiving) {
      /* count the number of bytes received */
      MPI_Wait(&request[0], &status[0]);
      MPI_Get_count(&status[0], MPI_BYTE, &nwrite);
      if (scr_crc_on_copy && nwrite > 0) {
        *crc32_recv = crc32(*crc32_recv, (const Bytef*) buf_recv, (uInt) nwrite);
      }

      /* write those bytes to file (if nwrite is smaller than scr_mpi_buf_size,
       * then we've received the whole file) */
      lseek(fd, write_pos, SEEK_SET); /* seek to write position */
      scr_write(file_recv, fd, buf_recv, nwrite);
      write_pos += (off_t) nwrite; /* update write pointer */

      /* if nwrite is smaller than scr_mpi_buf_size,
       * then assume we've received the whole file */
      if (nwrite < scr_mpi_buf_size) {
        receiving = 0;
      }
    }
  }

  /* close file and cleanup */
  if (have_outgoing && have_incoming) {
    /* sent and received a file; close it, truncate it to corect size, rename it */
    scr_close(file_send, fd);
    truncate(file_send, write_pos);
    rename(file_send, file_recv);
  } else if (have_outgoing) {
    /* only sent a file; close it, delete it, and remove its completion marker */
    scr_close(file_send, fd);
    scr_file_unlink(file_send);
  } else if (have_incoming) {
    /* only received a file; just need to close it */
    scr_close(file_recv, fd);
  }

  if (scr_crc_on_copy && have_outgoing) {
    uLong meta_send_crc;
    if (scr_meta_get_crc32(meta_send, &meta_send_crc) != SCR_SUCCESS) {
      /* we transfer this meta data across below,
       * so may as well update these fields so we can use them */
      scr_meta_set_crc32(meta_send, *crc32_send);
      /* do not complete file send, we just deleted it above */
    } else {
      /* TODO: we could check that the crc on the sent file matches and take some action if not */
    }
  }

  /* free the MPI buffers */
  scr_align_free(&buf_recv);
  scr_align_free(&buf_send);

  return rc;
}
Example #10
0
/* flush files specified in list, and record corresponding entries for summary file */
static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary)
{
  /* assume we will succeed in this flush */
  int rc = SCR_SUCCESS;

  /* flush each of my files and fill in summary data structure */
  scr_hash_elem* elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (elem = scr_hash_elem_first(files);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(elem);

    /* convert file to path and extract name of file */
    scr_path* path_name = scr_path_from_str(file);
    scr_path_basename(path_name);

    /* get the hash for this element */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get meta data for this file */
    scr_meta* meta = scr_hash_get(hash, SCR_KEY_META);

    /* if segments are defined, we flush the file to its containers,
     * otherwise we copy the file out as is */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* TODO: PRESERVE get original filename here */

      /* add this file to the summary file */
      char* name = scr_path_strdup(path_name);
      scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
      scr_free(&name);

// USERDEF fixme!
      /* flush the file to the containers listed in its segmenets */
      if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) {
        /* successfully flushed this file, record the filesize */
        unsigned long filesize = 0;
        if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
          scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
        }

        /* record the crc32 if one was computed */
        uLong crc = 0;
        if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
          scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
        }

        /* record segment information in summary file */
        scr_hash* segments_copy = scr_hash_new();
        scr_hash_merge(segments_copy, segments);
        scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy);
      } else {
        /* the flush failed */
        rc = SCR_FAILURE;

        /* explicitly mark file as incomplete */
        scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
      }
    } else {
      /* get directory to flush file to */
      char* dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) {
        /* create full path of destination file */
        scr_path* path_full = scr_path_from_str(dir);
        scr_path_append(path_full, path_name);

        /* get relative path to flushed file from SCR_PREFIX directory */
        scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full);
        if (! scr_path_is_null(path_relative)) {
          /* record the name of the file in the summary hash, and get reference to a hash for this file */
          char* name = scr_path_strdup(path_relative);
          scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
          scr_free(&name);

          /* flush the file and fill in the meta data for this file */
          if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) {
            /* successfully flushed this file, record the filesize */
            unsigned long filesize = 0;
            if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
              scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
            }

            /* record the crc32 if one was computed */
            uLong crc = 0;
            if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
              scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
            }
          } else {
            /* the flush failed */
            rc = SCR_FAILURE;

            /* explicitly mark incomplete files */
            scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
          }
        } else {
          scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d",
            dir, scr_prefix, __FILE__, __LINE__
          );
        }

        /* free relative and full paths */
        scr_path_delete(&path_relative);
        scr_path_delete(&path_full);
      } else {
        scr_abort(-1, "Failed to read directory to flush file to @ %s:%d",
          __FILE__, __LINE__
        );
      }
    }

    /* free the file name path */
    scr_path_delete(&path_name);
  }

  return rc;
}
Example #11
0
/* apply XOR redundancy scheme to dataset files */
static int scr_reddesc_apply_xor(scr_filemap* map, const scr_reddesc* c, int id)
{
  int rc = SCR_SUCCESS;
  int i;

  /* get pointer to XOR state structure */
  scr_reddesc_xor* state = (scr_reddesc_xor*) c->copy_state;

  /* allocate buffer to read a piece of my file */
  char* send_buf = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
  if (send_buf == NULL) {
    scr_abort(-1, "Allocating memory for send buffer: malloc(%d) errno=%d %s @ %s:%d",
            scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
    );
  }

  /* allocate buffer to read a piece of the recevied chunk file */
  char* recv_buf = (char*) scr_align_malloc(scr_mpi_buf_size, scr_page_size);
  if (recv_buf == NULL) {
    scr_abort(-1, "Allocating memory for recv buffer: malloc(%d) errno=%d %s @ %s:%d",
            scr_mpi_buf_size, errno, strerror(errno), __FILE__, __LINE__
    );
  }

  /* count the number of files I have and allocate space in structures for each of them */
  int num_files = scr_filemap_num_files(map, id, scr_my_rank_world);
  int* fds = (int*) SCR_MALLOC(num_files * sizeof(int));
  char** filenames = (char**) SCR_MALLOC(num_files * sizeof(char*));
  unsigned long* filesizes = (unsigned long*) SCR_MALLOC(num_files * sizeof(unsigned long));

  /* record partner's redundancy descriptor hash in our filemap */
  scr_hash* lhs_desc_hash = scr_hash_new();
  scr_hash* my_desc_hash  = scr_hash_new();
  scr_reddesc_store_to_hash(c, my_desc_hash);
  scr_hash_sendrecv(my_desc_hash, state->rhs_rank, lhs_desc_hash, state->lhs_rank, c->comm);
  scr_filemap_set_desc(map, id, state->lhs_rank_world, lhs_desc_hash);
  scr_hash_delete(&my_desc_hash);
  scr_hash_delete(&lhs_desc_hash);

  /* allocate a new xor file header hash */
  scr_hash* header = scr_hash_new();

  /* record the global ranks of the processes in our xor group */
  scr_hash_merge(header, state->group_map);

  /* record dataset in header */
  scr_hash* dataset = scr_hash_new();
  scr_filemap_get_dataset(map, id, scr_my_rank_world, dataset);
  scr_hash_set(header, SCR_KEY_COPY_XOR_DATASET, dataset);

  /* open each file, get the filesize of each, and read the meta data of each */
  scr_hash* current_files = scr_hash_new();
  int file_count = 0;
  unsigned long my_bytes = 0;
  scr_hash_elem* file_elem;
  for (file_elem = scr_filemap_first_file(map, id, scr_my_rank_world);
       file_elem != NULL;
       file_elem = scr_hash_elem_next(file_elem))
  {
    /* get the filename */
    filenames[file_count] = scr_hash_elem_key(file_elem);

    /* get the filesize of this file and add the byte count to the total */
    filesizes[file_count] = scr_file_size(filenames[file_count]);
    my_bytes += filesizes[file_count];

    /* read the meta data for this file and insert it into the current_files hash */
    scr_meta* file_hash = scr_meta_new();
    scr_filemap_get_meta(map, id, scr_my_rank_world, filenames[file_count], file_hash);
    scr_hash_setf(current_files, file_hash, "%d", file_count);

    /* open the file */
    fds[file_count]  = scr_open(filenames[file_count], O_RDONLY);
    if (fds[file_count] < 0) {
      /* TODO: try again? */
      scr_abort(-1, "Opening checkpoint file for copying: scr_open(%s, O_RDONLY) errno=%d %s @ %s:%d",
                filenames[file_count], errno, strerror(errno), __FILE__, __LINE__
      );
    }

    file_count++;
  }

  /* set total number of files we have, plus our rank */
  scr_hash* current_hash = scr_hash_new();
  scr_hash_set_kv_int(current_hash, SCR_KEY_COPY_XOR_RANK,  scr_my_rank_world);
  scr_hash_set_kv_int(current_hash, SCR_KEY_COPY_XOR_FILES, file_count);
  scr_hash_set(current_hash, SCR_KEY_COPY_XOR_FILE, current_files);

  /* exchange file info with partners and add data to our header */
  scr_hash* partner_hash = scr_hash_new();
  scr_hash_sendrecv(current_hash, state->rhs_rank, partner_hash, state->lhs_rank, c->comm);
  scr_hash_set(header, SCR_KEY_COPY_XOR_CURRENT, current_hash);
  scr_hash_set(header, SCR_KEY_COPY_XOR_PARTNER, partner_hash);

  /* allreduce to get maximum filesize */
  unsigned long max_bytes;
  MPI_Allreduce(&my_bytes, &max_bytes, 1, MPI_UNSIGNED_LONG, MPI_MAX, c->comm);

  /* TODO: use unsigned long integer arithmetic (with proper byte padding) instead of char to speed things up */

  /* compute chunk size according to maximum file length and number of ranks in xor set */
  /* if filesize doesn't divide evenly, then add one byte to chunk_size */
  /* TODO: check that ranks > 1 for this divide to be safe (or at partner selection time) */
  size_t chunk_size = max_bytes / (unsigned long) (c->ranks - 1);
  if ((c->ranks - 1) * chunk_size < max_bytes) {
    chunk_size++;
  }

  /* TODO: need something like this to handle 0-byte files? */
  if (chunk_size == 0) {
    chunk_size++;
  }

  /* record the dataset id and the chunk size in the xor chunk header */
  scr_hash_util_set_bytecount(header, SCR_KEY_COPY_XOR_CHUNK, chunk_size);

  /* set chunk filenames of form:  xor.<group_id>_<xor_rank+1>_of_<xor_ranks>.scr */
  char my_chunk_file[SCR_MAX_FILENAME];
  char* dir = scr_cache_dir_hidden_get(c, id);
  sprintf(my_chunk_file,  "%s/xor.%d_%d_of_%d.scr", dir, c->group_id, c->rank+1, c->ranks);
  scr_free(&dir);

  /* record chunk file in filemap before creating it */
  scr_filemap_add_file(map, id, scr_my_rank_world, my_chunk_file);
  scr_filemap_write(scr_map_file, map);

  /* open my chunk file */
  mode_t mode_file = scr_getmode(1, 1, 0);
  int fd_chunk = scr_open(my_chunk_file, O_WRONLY | O_CREAT | O_TRUNC, mode_file);
  if (fd_chunk < 0) {
    /* TODO: try again? */
    scr_abort(-1, "Opening XOR chunk file for writing: scr_open(%s) errno=%d %s @ %s:%d",
            my_chunk_file, errno, strerror(errno), __FILE__, __LINE__
    );
  }

  /* write out the xor chunk header */
  scr_hash_write_fd(my_chunk_file, fd_chunk, header);
  scr_hash_delete(&header);

  MPI_Request request[2];
  MPI_Status  status[2];

  /* XOR Reduce_scatter */
  size_t nread = 0;
  while (nread < chunk_size) {
    size_t count = chunk_size - nread;
    if (count > scr_mpi_buf_size) {
      count = scr_mpi_buf_size;
    }

    int chunk_id;
    for(chunk_id = c->ranks-1; chunk_id >= 0; chunk_id--) {
      /* read the next set of bytes for this chunk from my file into send_buf */
      if (chunk_id > 0) {
        int chunk_id_rel = (c->rank + c->ranks + chunk_id) % c->ranks;
        if (chunk_id_rel > c->rank) {
          chunk_id_rel--;
        }
        unsigned long offset = chunk_size * (unsigned long) chunk_id_rel + nread;
        if (scr_read_pad_n(num_files, filenames, fds,
                           send_buf, count, offset, filesizes) != SCR_SUCCESS)
        {
          rc = SCR_FAILURE;
        }
      } else {
        memset(send_buf, 0, count);
      }

      /* TODO: XORing with unsigned long would be faster here (if chunk size is multiple of this size) */
      /* merge the blocks via xor operation */
      if (chunk_id < c->ranks-1) {
        for (i = 0; i < count; i++) {
          send_buf[i] ^= recv_buf[i];
        }
      }

      if (chunk_id > 0) {
        /* not our chunk to write, forward it on and get the next */
        MPI_Irecv(recv_buf, count, MPI_BYTE, state->lhs_rank, 0, c->comm, &request[0]);
        MPI_Isend(send_buf, count, MPI_BYTE, state->rhs_rank, 0, c->comm, &request[1]);
        MPI_Waitall(2, request, status);
      } else {
        /* write send block to send chunk file */
        if (scr_write_attempt(my_chunk_file, fd_chunk, send_buf, count) != count) {
          rc = SCR_FAILURE;
        }
      }
    }

    nread += count;
  }

  /* close my chunkfile, with fsync */
  if (scr_close(my_chunk_file, fd_chunk) != SCR_SUCCESS) {
    rc = SCR_FAILURE;
  }

  /* close my dataset files */
  for (i=0; i < num_files; i++) {
    scr_close(filenames[i], fds[i]);
  }

  /* free the buffers */
  scr_free(&filesizes);
  /* in this case, we don't free each name, since we copied the pointer to the string in the filemap */
  scr_free(&filenames);
  scr_free(&fds);
  scr_align_free(&send_buf);
  scr_align_free(&recv_buf);

  /* TODO: need to check for errors */
  /* write meta file for xor chunk */
  unsigned long my_chunk_file_size = scr_file_size(my_chunk_file);
  scr_meta* meta = scr_meta_new();
  scr_meta_set_filename(meta, my_chunk_file);
  scr_meta_set_filetype(meta, SCR_META_FILE_XOR);
  scr_meta_set_filesize(meta, my_chunk_file_size);
  scr_meta_set_complete(meta, 1);
  /* TODODSET: move the ranks field elsewhere, for now it's needed by scr_index.c */
  scr_meta_set_ranks(meta, scr_ranks_world);
  scr_filemap_set_meta(map, id, scr_my_rank_world, my_chunk_file, meta);
  scr_filemap_write(scr_map_file, map);
  scr_meta_delete(&meta);

  /* if crc_on_copy is set, compute and store CRC32 value for chunk file */
  if (scr_crc_on_copy) {
    scr_compute_crc(map, id, scr_my_rank_world, my_chunk_file);
    /* TODO: would be nice to save this CRC in our partner's XOR file so we can check correctness on a rebuild */
  }

  return rc;
}