示例#1
0
int scr_swap_file_names(
  const char* file_send, int rank_send,
        char* file_recv, size_t size_recv, int rank_recv,
  const char* dir_recv, MPI_Comm comm)
{
  int rc = SCR_SUCCESS;

  /* determine whether we have a file to send */
  int have_outgoing = 0;
  if (rank_send != MPI_PROC_NULL &&
      file_send != NULL &&
      strcmp(file_send, "") != 0)
  {
    have_outgoing = 1;
  } else {
    /* nothing to send, make sure to use PROC_NULL in sendrecv call */
    rank_send = MPI_PROC_NULL;
  }

  /* determine whether we are expecting to receive a file */
  int have_incoming = 0;
  if (rank_recv != MPI_PROC_NULL &&
      dir_recv != NULL &&
      strcmp(dir_recv, "") != 0)
  {
    have_incoming = 1;
  } else {
    /* nothing to recv, make sure to use PROC_NULL in sendrecv call */
    rank_recv = MPI_PROC_NULL;
  }

  /* exchange file names with partners, note that we initialize
   * file_recv_orig to NULL in case we recv from MPI_PROC_NULL */
  char* file_recv_orig = NULL;
  scr_str_sendrecv(file_send, rank_send, &file_recv_orig, rank_recv, comm);

  /* define the path to store our partner's file */
  if (have_incoming) {
    /* set path to file name */
    scr_path* path_recv = scr_path_from_str(file_recv_orig);
    scr_path_basename(path_recv);
    scr_path_prepend_str(path_recv, dir_recv);
    scr_path_strcpy(file_recv, size_recv, path_recv);
    scr_path_delete(&path_recv);

    /* free the file name we received */
    scr_free(&file_recv_orig);
  }

  return rc;
}
示例#2
0
/* for file name listed in meta, fetch that file from src_dir and store
 * a copy in dst_dir, record full path to copy in newfile, and
 * return whether operation succeeded */
static int scr_fetch_file(
  const char* dst_file,
  const char* src_dir,
  const scr_meta* meta)
{
  int rc = SCR_SUCCESS;

  /* build full path to source file */
  scr_path* path_src_file = scr_path_from_str(dst_file);
  scr_path_basename(path_src_file);
  scr_path_prepend_str(path_src_file, src_dir);
  char* src_file = scr_path_strdup(path_src_file);

  /* fetch the file */
  uLong crc;
  uLong* crc_p = NULL;
  if (scr_crc_on_flush) {
    crc_p = &crc;
  }
  rc = scr_file_copy(src_file, dst_file, scr_file_buf_size, crc_p);

  /* check that crc matches crc stored in meta */
  uLong meta_crc;
  if (scr_meta_get_crc32(meta, &meta_crc) == SCR_SUCCESS) {
    if (rc == SCR_SUCCESS && scr_crc_on_flush && crc != meta_crc) {
      rc = SCR_FAILURE;
      scr_err("CRC32 mismatch detected when fetching file from %s to %s @ %s:%d",
        src_file, dst_file, __FILE__, __LINE__
      );

      /* TODO: would be good to log this, but right now only rank 0
       * can write log entries */
      /*
      if (scr_log_enable) {
        time_t now = scr_log_seconds();
        scr_log_event("CRC32 MISMATCH", filename, NULL, &now, NULL);
      }
      */
    }
  }

  /* free path and string for source file */
  scr_free(&src_file);
  scr_path_delete(&path_src_file);

  return rc;
}
示例#3
0
/* this moves all files of the specified dataset in the cache to
 * make them accessible to new rank mapping */
static int scr_distribute_files(scr_filemap* map, const scr_reddesc* red, int id)
{
  int i, round;
  int rc = SCR_SUCCESS;

  /* TODO: mark dataset as being distributed in filemap,
   * because if we fail in the middle of a distribute,
   * we can't trust the contents of the files anymore,
   * at which point it should be deleted */

  /* clean out any incomplete files before we start */
  scr_cache_clean(map);

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* walk backwards through the list of ranks, and set our start index
   * to the rank which is the first rank that is equal to or higher
   * than our own rank -- when we assign round ids below, this offsetting
   * helps distribute the load */
  int start_index = 0;
  int invalid_rank_found = 0;
  for (i = nranks-1; i >= 0; i--) {
    int rank = ranks[i];

    /* pick the first rank whose rank id is equal to or higher than our own */
    if (rank >= scr_my_rank_world) {
      start_index = i;
    }

    /* while we're at it, check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }
  }

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_free(&ranks);
    return SCR_FAILURE;
  }

  /* allocate array to record the rank we can send to in each round */
  int* have_rank_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);
  int* send_flag_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);

  /* check that we have all of the files for each rank,
   * and determine the round we can send them */
  scr_hash* send_hash = scr_hash_new();
  scr_hash* recv_hash = scr_hash_new();
  for (round = 0; round < nranks; round++) {
    /* get the rank id */
    int index = (start_index + round) % nranks;
    int rank = ranks[index];

    /* record the rank indexed by the round number */
    have_rank_by_round[round] = rank;

    /* assume we won't be sending to this rank in this round */
    send_flag_by_round[round] = 0;

    /* if we have files for this rank, specify the round we can
     * send those files in */
    if (scr_bool_have_files(map, id, rank)) {
      scr_hash_setf(send_hash, NULL, "%d %d", rank, round);
    }
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* search for the minimum round we can get our files */
  int retrieve_rank  = -1;
  int retrieve_round = -1;
  scr_hash_elem* elem = NULL;
  for (elem = scr_hash_elem_first(recv_hash);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the rank id */
    int rank = scr_hash_elem_key_int(elem);

    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record this round and rank number if it's less than the current round */
    if (round < retrieve_round || retrieve_round == -1) {
      retrieve_round = round;
      retrieve_rank  = rank;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  /* free off our list of ranks */
  scr_free(&ranks);

  /* for some redundancy schemes, we know at this point whether we
   * can recover all files */
  int can_get_files = (retrieve_rank != -1);
  if (red->copy_type != SCR_COPY_XOR && !scr_alltrue(can_get_files)) {
    /* print a debug message indicating which rank is missing files */
    if (! can_get_files) {
      scr_dbg(2, "Cannot find process that has my checkpoint files @ %s:%d",
        __FILE__, __LINE__
      );
    }
    return SCR_FAILURE;
  }

  /* get the maximum retrieve round */
  int max_rounds = 0;
  MPI_Allreduce(
    &retrieve_round, &max_rounds, 1, MPI_INT, MPI_MAX, scr_comm_world
  );

  /* tell destination which round we'll take our files in */
  send_hash = scr_hash_new();
  recv_hash = scr_hash_new();
  if (retrieve_rank != -1) {
    scr_hash_setf(send_hash, NULL, "%d %d", retrieve_rank, retrieve_round);
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* determine which ranks want to fetch their files from us */
  for(elem = scr_hash_elem_first(recv_hash);
      elem != NULL;
      elem = scr_hash_elem_next(elem))
  {
    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record whether this rank wants its files from us */
    if (round >= 0 && round < nranks) {
      send_flag_by_round[round] = 1;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  int tmp_rc = 0;

  /* run through rounds and exchange files */
  for (round = 0; round <= max_rounds; round++) {
    /* assume we don't need to send or receive any files this round */
    int send_rank = MPI_PROC_NULL;
    int recv_rank = MPI_PROC_NULL;
    int send_num  = 0;
    int recv_num  = 0;

    /* check whether I can potentially send to anyone in this round */
    if (round < nranks) {
      /* have someone's files, check whether they are asking
       * for them this round */
      if (send_flag_by_round[round]) {
        /* need to send files this round, remember to whom and how many */
        int dst_rank = have_rank_by_round[round];
        send_rank = dst_rank;
        send_num  = scr_filemap_num_files(map, id, dst_rank);
      }
    }

    /* if I'm supposed to get my files this round, set the recv_rank */
    if (retrieve_round == round) {
      recv_rank = retrieve_rank;
    }

    /* TODO: another special case is to just move files if the
     * processes are on the same node */

    /* if i'm sending to myself, just move (rename) each file */
    if (send_rank == scr_my_rank_world) {
      /* get our file list */
      int numfiles = 0;
      char** files = NULL;
      scr_filemap_list_files(map, id, send_rank, &numfiles, &files);

      /* TODO: sort files in reverse order by size */

      /* iterate over and rename each file */
      for (i=0; i < numfiles; i++) {
        /* get the current file name */
        char* file = files[i];

        /* lookup meta data for this file */
        scr_meta* meta = scr_meta_new();
        scr_filemap_get_meta(map, id, send_rank, file, meta);

        /* get the path for this file based on its type
         * and dataset id */
        char* dir = NULL;
        if (scr_meta_check_filetype(meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
          dir = scr_cache_dir_get(red, id);
        } else {
          dir = scr_cache_dir_hidden_get(red, id);
        }

        /* build the new file name */
        scr_path* path_newfile = scr_path_from_str(file);
        scr_path_basename(path_newfile);
        scr_path_prepend_str(path_newfile, dir);
        char* newfile = scr_path_strdup(path_newfile);

        /* if the new file name is different from the old name, rename it */
        if (strcmp(file, newfile) != 0) {
          /* record the new filename to our map and write it to disk */
          scr_filemap_add_file(map, id, send_rank, newfile);
          scr_filemap_set_meta(map, id, send_rank, newfile, meta);
          scr_filemap_write(scr_map_file, map);

          /* rename the file */
          scr_dbg(2, "Round %d: rename(%s, %s)", round, file, newfile);
          tmp_rc = rename(file, newfile);
          if (tmp_rc != 0) {
            /* TODO: to cross mount points, if tmp_rc == EXDEV,
             * open new file, copy, and delete orig */
            scr_err("Moving checkpoint file: rename(%s, %s) %s errno=%d @ %s:%d",
              file, newfile, strerror(errno), errno, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* remove the old name from the filemap and write it to disk */
          scr_filemap_remove_file(map, id, send_rank, file);
          scr_filemap_write(scr_map_file, map);
        }

        /* free the path and string */
        scr_free(&newfile);
        scr_path_delete(&path_newfile);

        /* free directory string */
        scr_free(&dir);

        /* free meta data */
        scr_meta_delete(&meta);
      }

      /* free the list of filename pointers */
      scr_free(&files);
    } else {
      /* if we have files for this round, but the correspdonding
       * rank doesn't need them, delete the files */
      if (round < nranks && send_rank == MPI_PROC_NULL) {
        int dst_rank = have_rank_by_round[round];
        scr_unlink_rank(map, id, dst_rank);
      }

      /* sending to and/or recieving from another node */
      if (send_rank != MPI_PROC_NULL || recv_rank != MPI_PROC_NULL) {
        /* have someone to send to or receive from */
        int have_outgoing = 0;
        int have_incoming = 0;
        if (send_rank != MPI_PROC_NULL) {
          have_outgoing = 1;
        }
        if (recv_rank != MPI_PROC_NULL) {
          have_incoming = 1;
        }

        /* first, determine how many files I will be receiving and
         * tell how many I will be sending */
        MPI_Request request[2];
        MPI_Status  status[2];
        int num_req = 0;
        if (have_incoming) {
          MPI_Irecv(
            &recv_num, 1, MPI_INT, recv_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (have_outgoing) {
          MPI_Isend(
            &send_num, 1, MPI_INT, send_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (num_req > 0) {
          MPI_Waitall(num_req, request, status);
        }

        /* record how many files I will receive (need to distinguish
         * between 0 files and not knowing) */
        if (have_incoming) {
          scr_filemap_set_expected_files(map, id, scr_my_rank_world, recv_num);
        }

        /* turn off send or receive flags if the file count is 0,
         * nothing else to do */
        if (send_num == 0) {
          have_outgoing = 0;
          send_rank = MPI_PROC_NULL;
        }
        if (recv_num == 0) {
          have_incoming = 0;
          recv_rank = MPI_PROC_NULL;
        }

        /* TODO: since we overwrite files in place in order to avoid
         * running out of storage space, we should sort files in order
         * of descending size for the next step */

        /* get our file list for the destination */
        int numfiles = 0;
        char** files = NULL;
        if (have_outgoing) {
          scr_filemap_list_files(map, id, send_rank, &numfiles, &files);
        }

        /* while we have a file to send or receive ... */
        while (have_incoming || have_outgoing) {
          /* get the filename */
          char* file = NULL;
          scr_meta* send_meta = NULL;
          if (have_outgoing) {
            file = files[numfiles - send_num];
            send_meta = scr_meta_new();
            scr_filemap_get_meta(map, id, send_rank, file, send_meta);
          }

          /* exchange meta data so we can determine type of incoming file */
          scr_meta* recv_meta = scr_meta_new();
          scr_hash_sendrecv(send_meta, send_rank, recv_meta, recv_rank, scr_comm_world);

          /* get the path for this file based on its type and dataset id */
          char* dir = NULL;
          if (have_incoming) {
            if (scr_meta_check_filetype(recv_meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
              dir = scr_cache_dir_get(red, id);
            } else {
              dir = scr_cache_dir_hidden_get(red, id);
            }
          }

          /* exhange file names with partners,
           * building full path of incoming file */
          char file_partner[SCR_MAX_FILENAME];
          scr_swap_file_names(
            file, send_rank, file_partner, sizeof(file_partner), recv_rank,
            dir, scr_comm_world
          );

          /* free directory string */
          scr_free(&dir);

          /* free incoming meta data (we'll get this again later) */
          scr_meta_delete(&recv_meta);

          /* if we'll receive a file, record the name of our file
           * in the filemap and write it to disk */
          recv_meta = NULL;
          if (recv_rank != MPI_PROC_NULL) {
            recv_meta = scr_meta_new();
            scr_filemap_add_file(map, id, scr_my_rank_world, file_partner);
            scr_filemap_write(scr_map_file, map);
          }

          /* either sending or receiving a file this round, since we move files,
           * it will be deleted or overwritten */
          if (scr_swap_files(MOVE_FILES, file, send_meta, send_rank,
              file_partner, recv_meta, recv_rank, scr_comm_world) != SCR_SUCCESS)
          {
            scr_err("Swapping files: %s to %d, %s from %d @ %s:%d",
                    file, send_rank, file_partner, recv_rank, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* if we received a file, record its meta data and decrement
           * our receive count */
          if (have_incoming) {
            /* record meta data for the file we received */
            scr_filemap_set_meta(map, id, scr_my_rank_world, file_partner, recv_meta);
            scr_meta_delete(&recv_meta);

            /* decrement receive count */
            recv_num--;
            if (recv_num == 0) {
              have_incoming = 0;
              recv_rank = MPI_PROC_NULL;
            }
          }

          /* if we sent a file, remove it from the filemap and decrement
           * our send count */
          if (have_outgoing) {
            /* remove file from the filemap */
            scr_filemap_remove_file(map, id, send_rank, file);
            scr_meta_delete(&send_meta);

            /* decrement our send count */
            send_num--;
            if (send_num == 0) {
              have_outgoing = 0;
              send_rank = MPI_PROC_NULL;
            }
          }

          /* update filemap on disk */
          scr_filemap_write(scr_map_file, map);
        }

        /* free our file list */
        scr_free(&files);
      }
    }
  }

  /* if we have more rounds than max rounds, delete the remainder of our files */
  for (round = max_rounds+1; round < nranks; round++) {
    /* have someone's files for this round, so delete them */
    int dst_rank = have_rank_by_round[round];
    scr_unlink_rank(map, id, dst_rank);
  }

  scr_free(&send_flag_by_round);
  scr_free(&have_rank_by_round);

  /* write out new filemap and free the memory resources */
  scr_filemap_write(scr_map_file, map);

  /* clean out any incomplete files */
  scr_cache_clean(map);

  /* TODO: if the exchange or redundancy rebuild failed,
   * we should also delete any *good* files we received */

  /* return whether distribute succeeded, it does not ensure we have
   * all of our files, only that the transfer completed without failure */
  return rc;
}
示例#4
0
文件: scr_flush_sync.c 项目: LLNL/scr
/* flushes file named in src_file to dst_dir and fills in meta based on flush,
 * returns success of flush */
static int scr_flush_a_file(const char* src_file, const char* dst_dir, scr_meta* meta)
{
  int flushed = SCR_SUCCESS;
  int tmp_rc;

  /* build full name to destination file */
  scr_path* dst_path = scr_path_from_str(src_file);
  scr_path_basename(dst_path);
  scr_path_prepend_str(dst_path, dst_dir);
  scr_path_reduce(dst_path);
  char* dst_file = scr_path_strdup(dst_path);

  /* copy file */
  int crc_valid = 0;
  uLong crc;
  uLong* crc_p = NULL;
  if (scr_crc_on_flush) {
    crc_valid = 1;
    crc_p = &crc;
  }
  tmp_rc = scr_file_copy(src_file, dst_file, scr_file_buf_size, crc_p);
  if (tmp_rc != SCR_SUCCESS) {
    crc_valid = 0;
    flushed = SCR_FAILURE;
  }
  scr_dbg(2, "scr_flush_a_file: Read and copied %s to %s with success code %d @ %s:%d",
    src_file, dst_file, tmp_rc, __FILE__, __LINE__
  );

  /* if file has crc32, check it against the one computed during the copy,
   * otherwise if scr_crc_on_flush is set, record crc32 */
  if (crc_valid) {
    uLong crc_meta;
    if (scr_meta_get_crc32(meta, &crc_meta) == SCR_SUCCESS) {
      if (crc != crc_meta) {
        /* detected a crc mismatch during the copy */

        /* TODO: unlink the copied file */
        /* scr_file_unlink(dst_file); */

        /* mark the file as invalid */
        scr_meta_set_complete(meta, 0);

        flushed = SCR_FAILURE;
        scr_err("scr_flush_a_file: CRC32 mismatch detected when flushing file %s to %s @ %s:%d",
          src_file, dst_file, __FILE__, __LINE__
        );

        /* TODO: would be good to log this, but right now only rank 0 can write log entries */
        /*
        if (scr_log_enable) {
          time_t now = scr_log_seconds();
          scr_log_event("CRC32 MISMATCH", dst_file, NULL, &now, NULL);
        }
        */
      }
    } else {
      /* the crc was not already in the metafile, but we just computed it, so set it */
      scr_meta_set_crc32(meta, crc);
    }
  }

  /* TODO: check that written filesize matches expected filesize */

  /* fill out meta data, set complete field based on flush success */
  /* (we don't update the meta file here, since perhaps the file in cache is ok and only the flush failed) */
  int complete = (flushed == SCR_SUCCESS);
  scr_meta_set_complete(meta, complete);

  /* free destination file string and path */
  scr_free(&dst_file);
  scr_path_delete(&dst_path);

  return flushed;
}
示例#5
0
int main(int argc, char* argv[])
{
  int i, j;
  int index = 1;

  /* print usage if not enough arguments were given */
  if (argc < 2) {
    printf("Usage: scr_rebuild_xor <size> <root> <missing_xor_filename> <ordered_remaining_xor_filenames>\n");
    return 1;
  }

  /* TODO: want to pass this on command line? */
  /* get current working directory */
  char dsetdir[SCR_MAX_FILENAME];
  scr_getcwd(dsetdir, sizeof(dsetdir));

  /* create and reduce path for dataset */
  scr_path* path_dset = scr_path_from_str(dsetdir);
  scr_path_reduce(path_dset);

  /* allocate buffers */
  char* buffer_A = malloc(buffer_size * sizeof(char));
  char* buffer_B = malloc(buffer_size * sizeof(char));
  if (buffer_A == NULL || buffer_B == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the size of the XOR set */
  int xor_set_size = (int) strtol(argv[index++], (char **)NULL, 10);
  if (xor_set_size <= 0) {
    scr_err("Invalid XOR set size argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* allocate memory for data structures based on the XOR set size */
  int*   num_files  = malloc(xor_set_size * sizeof(int));
  int*   offsets    = malloc(xor_set_size * sizeof(int));
  char** xor_files  = malloc(xor_set_size * sizeof(char*));
  int*   xor_fds    = malloc(xor_set_size * sizeof(int));
  scr_hash** xor_headers = malloc(xor_set_size * sizeof(scr_hash*));
  if (num_files == NULL || offsets == NULL || xor_files == NULL || xor_fds == NULL || xor_headers == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the rank of the missing process (the root) */
  int root = (int) strtol(argv[index++], (char **)NULL, 10);
  if (root < 0 || root >= xor_set_size) {
    scr_err("Invalid root argument %s @ %s:%d",
      argv[index-1], __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the missing xor filename */
  xor_files[0] = strdup(argv[index++]);
  if (xor_files[0] == NULL) {
    scr_err("Failed to dup XOR filename @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* read in the xor filenames (expected to be in order of XOR segment number) */
  /* we order ranks so that root is index 0, the rank to the right of root is index 1, and so on */
  for (i=0; i < xor_set_size; i++) {
    xor_headers[i] = scr_hash_new();

    /* we'll get the XOR file name for root from the header stored in the XOR file of the partner */
    if (i == root) {
      continue;
    }

    /* adjust the index relative to root */
    j = i - root;
    if (j < 0) {
      j += xor_set_size;
    }

    /* copy the XOR file name */
    xor_files[j] = strdup(argv[index++]);
    if (xor_files[j] == NULL) {
      scr_err("Failed to dup XOR filename @ %s:%d",
        __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* open each of the xor files and read in the headers */
  for (i=1; i < xor_set_size; i++) {
    /* open each xor file for reading */
    xor_fds[i] = scr_open(xor_files[i], O_RDONLY);
    if (xor_fds[i] < 0) {
      scr_err("Opening xor segment file: scr_open(%s) errno=%d %s @ %s:%d",
        xor_files[i], errno, strerror(errno), __FILE__, __LINE__
      );
      return 1;
    }

    /* read the header from this xor file */
    if (scr_hash_read_fd(xor_files[i], xor_fds[i], xor_headers[i]) < 0) {
      scr_err("Failed to read XOR header from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* build header for missing XOR file */
  int partner_rank = -1;
  if (xor_set_size >= 2) {
    scr_hash_merge(xor_headers[0], xor_headers[1]);

    /* fetch our own file list from rank to our right */
    scr_hash* rhs_hash = scr_hash_get(xor_headers[1], SCR_KEY_COPY_XOR_PARTNER);
    scr_hash* current_hash = scr_hash_new();
    scr_hash_merge(current_hash, rhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT, current_hash);

    /* we are the partner to the rank to our left */
    scr_hash* lhs_hash = scr_hash_get(xor_headers[xor_set_size-1], SCR_KEY_COPY_XOR_CURRENT);
    scr_hash* partner_hash = scr_hash_new();
    scr_hash_merge(partner_hash, lhs_hash);
    scr_hash_set(xor_headers[0], SCR_KEY_COPY_XOR_PARTNER, partner_hash);

    /* get global rank of partner */
    if (scr_hash_util_get_int(lhs_hash, SCR_KEY_COPY_XOR_RANK, &partner_rank) != SCR_SUCCESS) {
      scr_err("Failed to read partner rank from XOR file header in %s @ %s:%d",
        xor_files[xor_set_size-1], __FILE__, __LINE__
      );
      return 1;
    }
  }

  /* get a pointer to the current hash for the missing rank */
  scr_hash* missing_current_hash = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_CURRENT);

  /* read the rank */
  int my_rank = -1;
  if (scr_hash_util_get_int(missing_current_hash, SCR_KEY_COPY_XOR_RANK, &my_rank) != SCR_SUCCESS) {
    scr_err("Failed to read rank from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get the dataset */
  scr_dataset* dataset = scr_hash_get(xor_headers[0], SCR_KEY_COPY_XOR_DATASET);

  /* read the dataset id */
  int dset_id = -1;
  if (scr_dataset_get_id(dataset, &dset_id) != SCR_SUCCESS) {
    scr_err("Failed to read dataset id from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* read the ranks */
  int num_ranks = -1;
  if (scr_hash_util_get_int(xor_headers[0], SCR_KEY_COPY_XOR_RANKS, &num_ranks) != SCR_SUCCESS) {
    scr_err("Failed to read ranks from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* get name of partner's fmap */
  scr_path* path_partner_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_partner_map, "fmap.%d.scr", partner_rank);

  /* extract partner's flush descriptor */
  scr_hash* flushdesc = scr_hash_new();
  scr_filemap* partner_map = scr_filemap_new();
  scr_filemap_read(path_partner_map, partner_map);
  scr_filemap_get_flushdesc(partner_map, dset_id, partner_rank, flushdesc);
  scr_filemap_delete(&partner_map);

  /* delete partner map path */
  scr_path_delete(&path_partner_map);

  /* determine whether we should preserve user directories */
  int preserve_dirs = 0;
  scr_hash_util_get_int(flushdesc, SCR_SCAVENGE_KEY_PRESERVE, &preserve_dirs);

  /* read the chunk size */
  unsigned long chunk_size = 0;
  if (scr_hash_util_get_unsigned_long(xor_headers[0], SCR_KEY_COPY_XOR_CHUNK, &chunk_size) != SCR_SUCCESS) {
    scr_err("Failed to read chunk size from XOR file header in %s @ %s:%d",
      xor_files[0], __FILE__, __LINE__
    );
    return 1;
  }

  /* determine number of files each member wrote in XOR set */
  for (i=0; i < xor_set_size; i++) {
    /* record the number of files for this rank */
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);
    if (scr_hash_util_get_int(current_hash, SCR_KEY_COPY_XOR_FILES, &num_files[i]) != SCR_SUCCESS) {
      scr_err("Failed to read number of files from %s @ %s:%d",
        xor_files[i], __FILE__, __LINE__
      );
      return 1;
    }
  }
  
  /* count the total number of files and set the offsets array */
  int total_num_files = 0;
  for (i=0; i < xor_set_size; i++) {
    offsets[i] = total_num_files;
    total_num_files += num_files[i];
  }

  /* allocate space for a file descriptor, file name pointer, and filesize for each user file */
  int* user_fds                 = (int*)           malloc(total_num_files * sizeof(int));
  char** user_files             = (char**)         malloc(total_num_files * sizeof(char*));
  char** user_rel_files         = (char**)         malloc(total_num_files * sizeof(char*));
  unsigned long* user_filesizes = (unsigned long*) malloc(total_num_files * sizeof(unsigned long));
  if (user_fds == NULL || user_files == NULL || user_rel_files == NULL || user_filesizes == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* get file name, file size, and open each of the user files that we have */
  for (i=0; i < xor_set_size; i++) {
    scr_hash* current_hash = scr_hash_get(xor_headers[i], SCR_KEY_COPY_XOR_CURRENT);

    /* for each file belonging to this rank, get filename, filesize, and open file */
    for (j=0; j < num_files[i]; j++) {
      int offset = offsets[i] + j;

      /* get the meta data for this file */
      scr_meta* meta = scr_hash_get_kv_int(current_hash, SCR_KEY_COPY_XOR_FILE, j);
      if (meta == NULL) {
        scr_err("Failed to read meta data for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* record the filesize of this file */
      if (scr_meta_get_filesize(meta, &user_filesizes[offset]) != SCR_SUCCESS) {
        scr_err("Failed to read filesize field for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* get filename */
      char* origname;
      if (scr_meta_get_origname(meta, &origname) != SCR_SUCCESS) {
        scr_err("Failed to read original name for file %d in %s @ %s:%d",
          j, xor_files[i], __FILE__, __LINE__
        );
        return 1;
      }

      /* construct full path to user file */
      scr_path* path_user_full = scr_path_from_str(origname);
      if (preserve_dirs) {
        /* get original path of file */
        char* origpath;
        if (scr_meta_get_origpath(meta, &origpath) != SCR_SUCCESS) {
          scr_err("Failed to read original path for file %d in %s @ %s:%d",
            j, xor_files[i], __FILE__, __LINE__
          );
          return 1;
        }

        /* construct full path to file */
        scr_path_prepend_str(path_user_full, origpath);
      } else {
        /* construct full path to file */
        scr_path_prepend(path_user_full, path_dset);
      }

      /* reduce path to user file */
      scr_path_reduce(path_user_full);

      /* make a copy of the full path */
      user_files[offset] = scr_path_strdup(path_user_full);

      /* make a copy of relative path */
      scr_path* path_user_rel = scr_path_relative(path_dset, path_user_full);
      user_rel_files[offset] = scr_path_strdup(path_user_rel);
      scr_path_delete(&path_user_rel);

      /* free the full path */
      scr_path_delete(&path_user_full);

      /* open the file */
      if (i == 0) {
        /* create directory for file */
        scr_path* user_dir_path = scr_path_from_str(user_files[offset]);
        scr_path_reduce(user_dir_path);
        scr_path_dirname(user_dir_path);
        if (! scr_path_is_null(user_dir_path)) {
          char* user_dir = scr_path_strdup(user_dir_path);
          mode_t mode_dir = scr_getmode(1, 1, 1);
          if (scr_mkdir(user_dir, mode_dir) != SCR_SUCCESS) {
            scr_err("Failed to create directory for user file %s @ %s:%d",
              user_dir, __FILE__, __LINE__
            );
            return 1;
          }
          scr_free(&user_dir);
        }
        scr_path_delete(&user_dir_path);

        /* open missing file for writing */
        mode_t mode_file = scr_getmode(1, 1, 0);
        user_fds[offset] = scr_open(user_files[offset], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for writing: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      } else {
        /* open existing file for reading */
        user_fds[offset] = scr_open(user_files[offset], O_RDONLY);
        if (user_fds[offset] < 0) {
          scr_err("Opening user file for reading: scr_open(%s) errno=%d %s @ %s:%d",
            user_files[offset], errno, strerror(errno), __FILE__, __LINE__
          );
          return 1;
        }
      }
    }
  }

  /* finally, open the xor file for the missing rank */
  mode_t mode_file = scr_getmode(1, 1, 0);
  xor_fds[0] = scr_open(xor_files[0], O_WRONLY | O_CREAT | O_TRUNC, mode_file);
  if (xor_fds[0] < 0) {
    scr_err("Opening xor file to be reconstructed: scr_open(%s) errno=%d %s @ %s:%d",
      xor_files[0], errno, strerror(errno), __FILE__, __LINE__
    );
    return 1;
  }

  int rc = 0;

  /* write the header to the XOR file of the missing rank */
  if (scr_hash_write_fd(xor_files[0], xor_fds[0], xor_headers[0]) < 0) {
    rc = 1;
  }

  /* this offset array records the current position we are in the logical file for each rank */
  unsigned long* offset = malloc(xor_set_size * sizeof(unsigned long));
  if (offset == NULL) {
    scr_err("Failed to allocate buffer memory @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }
  for (i=0; i < xor_set_size; i++) {
    offset[i] = 0;
  }

  unsigned long write_pos = 0;
  int chunk_id;
  for (chunk_id = 0; chunk_id < xor_set_size && rc == 0; chunk_id++) {
    size_t nread = 0;
    while (nread < chunk_size && rc == 0) {
      /* read upto buffer_size bytes at a time */
      size_t count = chunk_size - nread;
      if (count > buffer_size) {
        count = buffer_size;
      }

      /* clear our buffer */
      memset(buffer_A, 0, count);

      /* read a segment from each rank and XOR it into our buffer */
      for (i=1; i < xor_set_size; i++) {
        /* read the next set of bytes for this chunk from my file into send_buf */
        if (chunk_id != ((i + root) % xor_set_size)) {
          /* read chunk from the logical file for this rank */
          if (scr_read_pad_n(num_files[i], &user_files[offsets[i]], &user_fds[offsets[i]],
                             buffer_B, count, offset[i], &user_filesizes[offsets[i]]) != SCR_SUCCESS)
          {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
          offset[i] += count;
        } else {
          /* read chunk from the XOR file for this rank */
          if (scr_read_attempt(xor_files[i], xor_fds[i], buffer_B, count) != count) {
            /* our read failed, set the return code to an error */
            rc = 1;
            count = 0;
          }
        }

        /* TODO: XORing with unsigned long would be faster here (if chunk size is multiple of this size) */
        /* merge the blocks via xor operation */
        for (j = 0; j < count; j++) {
          buffer_A[j] ^= buffer_B[j];
        }
      }

      /* at this point, we have the data from the missing rank, write it out */
      if (chunk_id != root) {
        /* write chunk to logical file for the missing rank */
        if (scr_write_pad_n(num_files[0], &user_files[0], &user_fds[0],
                            buffer_A, count, write_pos, &user_filesizes[0]) != SCR_SUCCESS)
        {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
        write_pos += count;
      } else {
        /* write chunk to xor file for the missing rank */
        if (scr_write_attempt(xor_files[0], xor_fds[0], buffer_A, count) != count) {
          /* our write failed, set the return code to an error */
          rc = 1;
        }
      }

      nread += count;
    }
  }

  /* close each of the user files */
  for (i=0; i < total_num_files; i++) {
    if (scr_close(user_files[i], user_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* close each of the XOR files */
  for (i=0; i < xor_set_size; i++) {
    if (scr_close(xor_files[i], xor_fds[i]) != SCR_SUCCESS) {
      rc = 1;
    }
  }

  /* if the write failed, delete the files we just wrote, and return an error */
  if (rc != 0) {
    for (j=0; j < num_files[0]; j++) {
      scr_file_unlink(user_files[j]);
    }
    scr_file_unlink(xor_files[0]);
    return 1;
  }

  /* check that filesizes are correct */
  unsigned long filesize;
  for (j=0; j < num_files[0]; j++) {
    filesize = scr_file_size(user_files[j]);
    if (filesize != user_filesizes[j]) {
      /* the filesize check failed, so delete the file */
      scr_file_unlink(user_files[j]);

      /* mark the file as incomplete */
      scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
      scr_meta_set_complete(meta, 0);

      rc = 1;
    }
  }
  /* TODO: we didn't record the filesize of the XOR file for the missing rank anywhere */

  /* create a filemap for this rank */
  scr_filemap* map = scr_filemap_new();
  if (map == NULL) {
    scr_err("Failed to allocate filemap @ %s:%d",
      __FILE__, __LINE__
    );
    return 1;
  }

  /* record the dataset information in the filemap */
  scr_filemap_set_dataset(map, dset_id, my_rank, dataset);

  /* write meta data for each of the user files and add each one to the filemap */
  for (j=0; j < num_files[0]; j++) {
    /* add user file to filemap and record meta data */
    char* user_file_relative = user_rel_files[j];
    scr_filemap_add_file(map, dset_id, my_rank, user_file_relative);
    scr_meta* meta = scr_hash_get_kv_int(missing_current_hash, SCR_KEY_COPY_XOR_FILE, j);
    scr_filemap_set_meta(map, dset_id, my_rank, user_file_relative, meta);
  }

  /* write meta data for xor file and add it to the filemap */
  scr_filemap_add_file(map, dset_id, my_rank, xor_files[0]);
  unsigned long full_chunk_filesize = scr_file_size(xor_files[0]);
  int missing_complete = 1;
  scr_meta* meta_chunk = scr_meta_new();
  scr_meta_set_filename(meta_chunk, xor_files[0]);
  scr_meta_set_filetype(meta_chunk, SCR_META_FILE_XOR);
  scr_meta_set_filesize(meta_chunk, full_chunk_filesize);
  /* TODO: remove this from meta file, for now it's needed in scr_index.c */
  scr_meta_set_ranks(meta_chunk, num_ranks);
  scr_meta_set_complete(meta_chunk, missing_complete);
  scr_filemap_set_meta(map, dset_id, my_rank, xor_files[0], meta_chunk);

  /* set expected number of files for the missing rank */
  int expected_num_files = scr_filemap_num_files(map, dset_id, my_rank);
  scr_filemap_set_expected_files(map, dset_id, my_rank, expected_num_files);

  /* compute, check, and store crc values with files */
  for (j=0; j < num_files[0]; j++) {
    /* compute crc on user file */
    char* user_file_relative = user_rel_files[j];
    if (scr_compute_crc(map, dset_id, my_rank, user_file_relative) != SCR_SUCCESS) {
      /* the crc check failed, so delete the file */
      scr_file_unlink(user_files[j]);
      rc = 1;
    }
  }
  if (scr_compute_crc(map, dset_id, my_rank, xor_files[0]) != SCR_SUCCESS) {
    /* the crc check failed, so delete the file */
    scr_file_unlink(xor_files[0]);
    rc = 1;
  }

  /* store flush descriptor */
  scr_filemap_set_flushdesc(map, dset_id, my_rank, flushdesc);

  /* write filemap for this rank */
  scr_path* path_map = scr_path_from_str(".scr");
  scr_path_append_strf(path_map, "fmap.%d.scr", my_rank);
  if (scr_filemap_write(path_map, map) != SCR_SUCCESS) {
    rc = 1;
  }
  scr_path_delete(&path_map);

  /* delete the map */
  scr_filemap_delete(&map);

  scr_meta_delete(&meta_chunk);

  /* delete the flush/scavenge descriptor */
  scr_hash_delete(&flushdesc);

  scr_free(&offset);

  for (i=0; i < total_num_files; i++) {
    scr_free(&user_rel_files[i]);
    scr_free(&user_files[i]);
  }

  scr_free(&user_filesizes);
  scr_free(&user_rel_files);
  scr_free(&user_files);
  scr_free(&user_fds);

  for (i=0; i < xor_set_size; i++) {
    scr_hash_delete(&xor_headers[i]);
  }

  for (i=0; i < xor_set_size; i++) {
    scr_free(&xor_files[i]);
  }

  scr_free(&xor_headers);
  scr_free(&xor_fds);
  scr_free(&xor_files);
  scr_free(&offsets);
  scr_free(&num_files);

  scr_free(&buffer_B);
  scr_free(&buffer_A);

  scr_path_delete(&path_dset);

  return rc;
}
示例#6
0
/* fetch files listed in hash into specified cache directory,
 * update filemap and fill in total number of bytes fetched,
 * returns SCR_SUCCESS if successful */
static int scr_fetch_files_list(
  const scr_hash* file_list,
  const char* dir,
  scr_filemap* map)
{
  /* assume we'll succeed in fetching our files */
  int rc = SCR_SUCCESS;

  /* assume we don't have any files to fetch */
  int my_num_files = 0;

  /* get dataset id */
  int id;
  scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET);
  scr_dataset_get_id(dataset, &id);

  /* now iterate through the file list and fetch each file */
  scr_hash_elem* file_elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (file_elem = scr_hash_elem_first(files);
       file_elem != NULL;
       file_elem = scr_hash_elem_next(file_elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(file_elem);

    /* get a pointer to the hash for this file */
    scr_hash* hash = scr_hash_elem_hash(file_elem);

    /* check whether we are supposed to fetch this file */
    /* TODO: this is a hacky way to avoid reading a redundancy file
     * back in under the assumption that it's an original file, which
     * breaks our redundancy computation due to a name conflict on
     * the file names */
    scr_hash_elem* no_fetch_hash = scr_hash_elem_get(hash, SCR_SUMMARY_6_KEY_NOFETCH);
    if (no_fetch_hash != NULL) {
      continue;
    }

    /* increment our file count */
    my_num_files++;

    /* build the destination file name */
    scr_path* path_newfile = scr_path_from_str(file);
    scr_path_basename(path_newfile);
    scr_path_prepend_str(path_newfile, dir);
    char* newfile = scr_path_strdup(path_newfile);
      
    /* add the file to our filemap and write it to disk before creating
     * the file, this way we have a record that it may exist before we
     * actually start to fetch it */
    scr_filemap_add_file(map, id, scr_my_rank_world, newfile);
    scr_filemap_write(scr_map_file, map);

    /* get the file size */
    unsigned long filesize = 0;
    if (scr_hash_util_get_unsigned_long(hash, SCR_KEY_SIZE, &filesize) != SCR_SUCCESS) {
      scr_err("Failed to read file size from summary data @ %s:%d",
        __FILE__, __LINE__
      );
      rc = SCR_FAILURE;

      /* free path and string */
      scr_free(&newfile);
      scr_path_delete(&path_newfile);

      break;
    }

    /* check for a complete flag */
    int complete = 1;
    if (scr_hash_util_get_int(hash, SCR_KEY_COMPLETE, &complete) != SCR_SUCCESS) {
      /* in summary file, the absence of a complete flag on a file
       * implies the file is complete */
      complete = 1;
    }

    /* create a new meta data object for this file */
    scr_meta* meta = scr_meta_new();

    /* set the meta data */
    scr_meta_set_filename(meta, newfile);
    scr_meta_set_filetype(meta, SCR_META_FILE_USER);
    scr_meta_set_filesize(meta, filesize);
    scr_meta_set_complete(meta, 1);
    /* TODODSET: move the ranks field elsewhere, for now it's needed
     * by scr_index.c */
    scr_meta_set_ranks(meta, scr_ranks_world);

    /* get the crc, if set, and add it to the meta data */
    uLong crc;
    if (scr_hash_util_get_crc32(hash, SCR_KEY_CRC, &crc) == SCR_SUCCESS) {
      scr_meta_set_crc32(meta, crc);
    }

    /* fetch file from containers if they are defined, otherwise fetch
     * the native file */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* get source path */
      char* from_dir;
      if (scr_hash_util_get_str(file_list, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        /* fetch file from containers */
        if (scr_fetch_file_from_containers(newfile, meta, segments, from_dir) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to find base dataset directory in file list */
        rc = SCR_FAILURE;
      }
    } else {
      /* fetch native file, lookup directory for this file */
      char* from_dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        if (scr_fetch_file(newfile, from_dir, meta) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to read source directory, mark file as incomplete */
        scr_meta_set_complete(meta, 0);
        rc = SCR_FAILURE;
      }
    }

    /* TODODSET: want to write out filemap before we start to fetch
     * each file? */

    /* mark the file as complete */
    scr_filemap_set_meta(map, id, scr_my_rank_world, newfile, meta);

    /* free the meta data object */
    scr_meta_delete(&meta);

    /* free path and string */
    scr_free(&newfile);
    scr_path_delete(&path_newfile);
  }

  /* set the expected number of files for this dataset */
  scr_filemap_set_expected_files(map, id, scr_my_rank_world, my_num_files);
  scr_filemap_write(scr_map_file, map);

  return rc;
}