Esempio n. 1
0
/* this transfers redundancy descriptors for the given dataset id */
static int scr_distribute_reddescs(scr_filemap* map, int id, scr_reddesc* red)
{
  int i;

  /* create a new hash to record redundancy descriptors that we have */
  scr_hash* send_hash = scr_hash_new();

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* for each rank we have files for, check whether we also have
   * its redundancy descriptor */
  int invalid_rank_found = 0;
  for (i=0; i < nranks; i++) {
    /* get the rank id */
    int rank = ranks[i];

    /* check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }

    /* lookup the redundancy descriptor hash for this rank */
    scr_hash* desc = scr_hash_new();
    scr_filemap_get_desc(map, id, rank, desc);

    /* if this descriptor has entries, add it to our send hash,
     * delete the hash otherwise */
    if (scr_hash_size(desc) > 0) {
      scr_hash_setf(send_hash, desc, "%d", rank);
    } else {
      scr_hash_delete(&desc);
    }
  }

  /* free off our list of ranks */
  scr_free(&ranks);

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_hash_delete(&send_hash);
    return SCR_FAILURE;
  }

  /* create an empty hash to receive any incoming descriptors */
  /* exchange descriptors with other ranks */
  scr_hash* recv_hash = scr_hash_new();
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* check that everyone can get their descriptor */
  int num_desc = scr_hash_size(recv_hash);
  if (! scr_alltrue(num_desc > 0)) {
    scr_hash_delete(&recv_hash);
    scr_hash_delete(&send_hash);
    scr_dbg(2, "Cannot find process that has my redundancy descriptor @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* just go with the first redundancy descriptor in our list,
   * they should all be the same */
  scr_hash_elem* desc_elem = scr_hash_elem_first(recv_hash);
  scr_hash* desc_hash = scr_hash_elem_hash(desc_elem);

  /* record the descriptor in our filemap */
  scr_filemap_set_desc(map, id, scr_my_rank_world, desc_hash);
  scr_filemap_write(scr_map_file, map);

  /* TODO: at this point, we could delete descriptors for other
   * ranks for this checkpoint */

  /* create our redundancy descriptor struct from the map */
  scr_reddesc_create_from_filemap(map, id, scr_my_rank_world, red);

  /* free off our send and receive hashes */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  return SCR_SUCCESS;
}
Esempio n. 2
0
/* this moves all files of the specified dataset in the cache to
 * make them accessible to new rank mapping */
static int scr_distribute_files(scr_filemap* map, const scr_reddesc* red, int id)
{
  int i, round;
  int rc = SCR_SUCCESS;

  /* TODO: mark dataset as being distributed in filemap,
   * because if we fail in the middle of a distribute,
   * we can't trust the contents of the files anymore,
   * at which point it should be deleted */

  /* clean out any incomplete files before we start */
  scr_cache_clean(map);

  /* for this dataset, get list of ranks we have data for */
  int  nranks = 0;
  int* ranks = NULL;
  scr_filemap_list_ranks_by_dataset(map, id, &nranks, &ranks);

  /* walk backwards through the list of ranks, and set our start index
   * to the rank which is the first rank that is equal to or higher
   * than our own rank -- when we assign round ids below, this offsetting
   * helps distribute the load */
  int start_index = 0;
  int invalid_rank_found = 0;
  for (i = nranks-1; i >= 0; i--) {
    int rank = ranks[i];

    /* pick the first rank whose rank id is equal to or higher than our own */
    if (rank >= scr_my_rank_world) {
      start_index = i;
    }

    /* while we're at it, check that the rank is within range */
    if (rank < 0 || rank >= scr_ranks_world) {
      scr_err("Invalid rank id %d in world of %d @ %s:%d",
        rank, scr_ranks_world, __FILE__, __LINE__
      );
      invalid_rank_found = 1;
    }
  }

  /* check that we didn't find an invalid rank on any process */
  if (! scr_alltrue(invalid_rank_found == 0)) {
    scr_free(&ranks);
    return SCR_FAILURE;
  }

  /* allocate array to record the rank we can send to in each round */
  int* have_rank_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);
  int* send_flag_by_round = (int*) SCR_MALLOC(sizeof(int) * nranks);

  /* check that we have all of the files for each rank,
   * and determine the round we can send them */
  scr_hash* send_hash = scr_hash_new();
  scr_hash* recv_hash = scr_hash_new();
  for (round = 0; round < nranks; round++) {
    /* get the rank id */
    int index = (start_index + round) % nranks;
    int rank = ranks[index];

    /* record the rank indexed by the round number */
    have_rank_by_round[round] = rank;

    /* assume we won't be sending to this rank in this round */
    send_flag_by_round[round] = 0;

    /* if we have files for this rank, specify the round we can
     * send those files in */
    if (scr_bool_have_files(map, id, rank)) {
      scr_hash_setf(send_hash, NULL, "%d %d", rank, round);
    }
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* search for the minimum round we can get our files */
  int retrieve_rank  = -1;
  int retrieve_round = -1;
  scr_hash_elem* elem = NULL;
  for (elem = scr_hash_elem_first(recv_hash);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the rank id */
    int rank = scr_hash_elem_key_int(elem);

    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record this round and rank number if it's less than the current round */
    if (round < retrieve_round || retrieve_round == -1) {
      retrieve_round = round;
      retrieve_rank  = rank;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  /* free off our list of ranks */
  scr_free(&ranks);

  /* for some redundancy schemes, we know at this point whether we
   * can recover all files */
  int can_get_files = (retrieve_rank != -1);
  if (red->copy_type != SCR_COPY_XOR && !scr_alltrue(can_get_files)) {
    /* print a debug message indicating which rank is missing files */
    if (! can_get_files) {
      scr_dbg(2, "Cannot find process that has my checkpoint files @ %s:%d",
        __FILE__, __LINE__
      );
    }
    return SCR_FAILURE;
  }

  /* get the maximum retrieve round */
  int max_rounds = 0;
  MPI_Allreduce(
    &retrieve_round, &max_rounds, 1, MPI_INT, MPI_MAX, scr_comm_world
  );

  /* tell destination which round we'll take our files in */
  send_hash = scr_hash_new();
  recv_hash = scr_hash_new();
  if (retrieve_rank != -1) {
    scr_hash_setf(send_hash, NULL, "%d %d", retrieve_rank, retrieve_round);
  }
  scr_hash_exchange(send_hash, recv_hash, scr_comm_world);

  /* determine which ranks want to fetch their files from us */
  for(elem = scr_hash_elem_first(recv_hash);
      elem != NULL;
      elem = scr_hash_elem_next(elem))
  {
    /* get the round id */
    scr_hash* round_hash = scr_hash_elem_hash(elem);
    scr_hash_elem* round_elem = scr_hash_elem_first(round_hash);
    char* round_str = scr_hash_elem_key(round_elem);
    int round = atoi(round_str);

    /* record whether this rank wants its files from us */
    if (round >= 0 && round < nranks) {
      send_flag_by_round[round] = 1;
    }
  }

  /* done with the round hashes, free them off */
  scr_hash_delete(&recv_hash);
  scr_hash_delete(&send_hash);

  int tmp_rc = 0;

  /* run through rounds and exchange files */
  for (round = 0; round <= max_rounds; round++) {
    /* assume we don't need to send or receive any files this round */
    int send_rank = MPI_PROC_NULL;
    int recv_rank = MPI_PROC_NULL;
    int send_num  = 0;
    int recv_num  = 0;

    /* check whether I can potentially send to anyone in this round */
    if (round < nranks) {
      /* have someone's files, check whether they are asking
       * for them this round */
      if (send_flag_by_round[round]) {
        /* need to send files this round, remember to whom and how many */
        int dst_rank = have_rank_by_round[round];
        send_rank = dst_rank;
        send_num  = scr_filemap_num_files(map, id, dst_rank);
      }
    }

    /* if I'm supposed to get my files this round, set the recv_rank */
    if (retrieve_round == round) {
      recv_rank = retrieve_rank;
    }

    /* TODO: another special case is to just move files if the
     * processes are on the same node */

    /* if i'm sending to myself, just move (rename) each file */
    if (send_rank == scr_my_rank_world) {
      /* get our file list */
      int numfiles = 0;
      char** files = NULL;
      scr_filemap_list_files(map, id, send_rank, &numfiles, &files);

      /* TODO: sort files in reverse order by size */

      /* iterate over and rename each file */
      for (i=0; i < numfiles; i++) {
        /* get the current file name */
        char* file = files[i];

        /* lookup meta data for this file */
        scr_meta* meta = scr_meta_new();
        scr_filemap_get_meta(map, id, send_rank, file, meta);

        /* get the path for this file based on its type
         * and dataset id */
        char* dir = NULL;
        if (scr_meta_check_filetype(meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
          dir = scr_cache_dir_get(red, id);
        } else {
          dir = scr_cache_dir_hidden_get(red, id);
        }

        /* build the new file name */
        scr_path* path_newfile = scr_path_from_str(file);
        scr_path_basename(path_newfile);
        scr_path_prepend_str(path_newfile, dir);
        char* newfile = scr_path_strdup(path_newfile);

        /* if the new file name is different from the old name, rename it */
        if (strcmp(file, newfile) != 0) {
          /* record the new filename to our map and write it to disk */
          scr_filemap_add_file(map, id, send_rank, newfile);
          scr_filemap_set_meta(map, id, send_rank, newfile, meta);
          scr_filemap_write(scr_map_file, map);

          /* rename the file */
          scr_dbg(2, "Round %d: rename(%s, %s)", round, file, newfile);
          tmp_rc = rename(file, newfile);
          if (tmp_rc != 0) {
            /* TODO: to cross mount points, if tmp_rc == EXDEV,
             * open new file, copy, and delete orig */
            scr_err("Moving checkpoint file: rename(%s, %s) %s errno=%d @ %s:%d",
              file, newfile, strerror(errno), errno, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* remove the old name from the filemap and write it to disk */
          scr_filemap_remove_file(map, id, send_rank, file);
          scr_filemap_write(scr_map_file, map);
        }

        /* free the path and string */
        scr_free(&newfile);
        scr_path_delete(&path_newfile);

        /* free directory string */
        scr_free(&dir);

        /* free meta data */
        scr_meta_delete(&meta);
      }

      /* free the list of filename pointers */
      scr_free(&files);
    } else {
      /* if we have files for this round, but the correspdonding
       * rank doesn't need them, delete the files */
      if (round < nranks && send_rank == MPI_PROC_NULL) {
        int dst_rank = have_rank_by_round[round];
        scr_unlink_rank(map, id, dst_rank);
      }

      /* sending to and/or recieving from another node */
      if (send_rank != MPI_PROC_NULL || recv_rank != MPI_PROC_NULL) {
        /* have someone to send to or receive from */
        int have_outgoing = 0;
        int have_incoming = 0;
        if (send_rank != MPI_PROC_NULL) {
          have_outgoing = 1;
        }
        if (recv_rank != MPI_PROC_NULL) {
          have_incoming = 1;
        }

        /* first, determine how many files I will be receiving and
         * tell how many I will be sending */
        MPI_Request request[2];
        MPI_Status  status[2];
        int num_req = 0;
        if (have_incoming) {
          MPI_Irecv(
            &recv_num, 1, MPI_INT, recv_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (have_outgoing) {
          MPI_Isend(
            &send_num, 1, MPI_INT, send_rank, 0,
            scr_comm_world, &request[num_req]
          );
          num_req++;
        }
        if (num_req > 0) {
          MPI_Waitall(num_req, request, status);
        }

        /* record how many files I will receive (need to distinguish
         * between 0 files and not knowing) */
        if (have_incoming) {
          scr_filemap_set_expected_files(map, id, scr_my_rank_world, recv_num);
        }

        /* turn off send or receive flags if the file count is 0,
         * nothing else to do */
        if (send_num == 0) {
          have_outgoing = 0;
          send_rank = MPI_PROC_NULL;
        }
        if (recv_num == 0) {
          have_incoming = 0;
          recv_rank = MPI_PROC_NULL;
        }

        /* TODO: since we overwrite files in place in order to avoid
         * running out of storage space, we should sort files in order
         * of descending size for the next step */

        /* get our file list for the destination */
        int numfiles = 0;
        char** files = NULL;
        if (have_outgoing) {
          scr_filemap_list_files(map, id, send_rank, &numfiles, &files);
        }

        /* while we have a file to send or receive ... */
        while (have_incoming || have_outgoing) {
          /* get the filename */
          char* file = NULL;
          scr_meta* send_meta = NULL;
          if (have_outgoing) {
            file = files[numfiles - send_num];
            send_meta = scr_meta_new();
            scr_filemap_get_meta(map, id, send_rank, file, send_meta);
          }

          /* exchange meta data so we can determine type of incoming file */
          scr_meta* recv_meta = scr_meta_new();
          scr_hash_sendrecv(send_meta, send_rank, recv_meta, recv_rank, scr_comm_world);

          /* get the path for this file based on its type and dataset id */
          char* dir = NULL;
          if (have_incoming) {
            if (scr_meta_check_filetype(recv_meta, SCR_META_FILE_USER) == SCR_SUCCESS) {
              dir = scr_cache_dir_get(red, id);
            } else {
              dir = scr_cache_dir_hidden_get(red, id);
            }
          }

          /* exhange file names with partners,
           * building full path of incoming file */
          char file_partner[SCR_MAX_FILENAME];
          scr_swap_file_names(
            file, send_rank, file_partner, sizeof(file_partner), recv_rank,
            dir, scr_comm_world
          );

          /* free directory string */
          scr_free(&dir);

          /* free incoming meta data (we'll get this again later) */
          scr_meta_delete(&recv_meta);

          /* if we'll receive a file, record the name of our file
           * in the filemap and write it to disk */
          recv_meta = NULL;
          if (recv_rank != MPI_PROC_NULL) {
            recv_meta = scr_meta_new();
            scr_filemap_add_file(map, id, scr_my_rank_world, file_partner);
            scr_filemap_write(scr_map_file, map);
          }

          /* either sending or receiving a file this round, since we move files,
           * it will be deleted or overwritten */
          if (scr_swap_files(MOVE_FILES, file, send_meta, send_rank,
              file_partner, recv_meta, recv_rank, scr_comm_world) != SCR_SUCCESS)
          {
            scr_err("Swapping files: %s to %d, %s from %d @ %s:%d",
                    file, send_rank, file_partner, recv_rank, __FILE__, __LINE__
            );
            rc = SCR_FAILURE;
          }

          /* if we received a file, record its meta data and decrement
           * our receive count */
          if (have_incoming) {
            /* record meta data for the file we received */
            scr_filemap_set_meta(map, id, scr_my_rank_world, file_partner, recv_meta);
            scr_meta_delete(&recv_meta);

            /* decrement receive count */
            recv_num--;
            if (recv_num == 0) {
              have_incoming = 0;
              recv_rank = MPI_PROC_NULL;
            }
          }

          /* if we sent a file, remove it from the filemap and decrement
           * our send count */
          if (have_outgoing) {
            /* remove file from the filemap */
            scr_filemap_remove_file(map, id, send_rank, file);
            scr_meta_delete(&send_meta);

            /* decrement our send count */
            send_num--;
            if (send_num == 0) {
              have_outgoing = 0;
              send_rank = MPI_PROC_NULL;
            }
          }

          /* update filemap on disk */
          scr_filemap_write(scr_map_file, map);
        }

        /* free our file list */
        scr_free(&files);
      }
    }
  }

  /* if we have more rounds than max rounds, delete the remainder of our files */
  for (round = max_rounds+1; round < nranks; round++) {
    /* have someone's files for this round, so delete them */
    int dst_rank = have_rank_by_round[round];
    scr_unlink_rank(map, id, dst_rank);
  }

  scr_free(&send_flag_by_round);
  scr_free(&have_rank_by_round);

  /* write out new filemap and free the memory resources */
  scr_filemap_write(scr_map_file, map);

  /* clean out any incomplete files */
  scr_cache_clean(map);

  /* TODO: if the exchange or redundancy rebuild failed,
   * we should also delete any *good* files we received */

  /* return whether distribute succeeded, it does not ensure we have
   * all of our files, only that the transfer completed without failure */
  return rc;
}
Esempio n. 3
0
/* given a filename, its meta data, its list of segments, and list of destination containers,
 * copy file to container files */
static int scr_flush_file_to_containers(
  const char* file,
  scr_meta* meta,
  scr_hash* segments,
  const char* dst_dir)
{
  /* check that we got something for a source file */
  if (file == NULL || strcmp(file, "") == 0) {
    scr_err("Invalid source file @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* check that our other arguments are valid */
  if (meta == NULL || segments == NULL) {
    scr_err("Invalid metadata or segments @ %s:%d",
      __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

  /* open the file for reading */
  int fd_src = scr_open(file, O_RDONLY);
  if (fd_src < 0) {
    scr_err("Opening file to copy: scr_open(%s) errno=%d %s @ %s:%d",
      file, errno, strerror(errno), __FILE__, __LINE__
    );
    return SCR_FAILURE;
  }

#if !defined(__APPLE__)
  /* TODO:
  posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL)
  that tells the kernel that you don't ever need the pages
  from the file again, and it won't bother keeping them in the page cache.
  */
  posix_fadvise(fd_src, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL);
#endif

  /* get the buffer size we'll use to write to the file */
  unsigned long buf_size = scr_file_buf_size;

  /* allocate buffer to read in file chunks */
  char* buf = (char*) SCR_MALLOC(buf_size);

  /* initialize crc value */
  uLong crc;
  if (scr_crc_on_flush) {
    crc = crc32(0L, Z_NULL, 0);
  }

  int rc = SCR_SUCCESS;

  /* write out each segment */
  scr_hash_sort_int(segments, SCR_HASH_SORT_ASCENDING);
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(segments);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the container info for this segment */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get the offset into the container and the length of the segment (both in bytes) */
    char* container_name;
    unsigned long container_offset, segment_length;
    if (scr_container_get_name_offset_length(hash,
      &container_name, &container_offset, &segment_length) != SCR_SUCCESS)
    {
      scr_err("Failed to get segment offset and length @ %s:%d",
              __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

    /* build full name to destination file */
    scr_path* dst_path = scr_path_from_str(dst_dir);
    scr_path_append_str(dst_path, container_name);
    scr_path_reduce(dst_path);
    char* dst_file = scr_path_strdup(dst_path);

    /* open container file for writing -- we don't truncate here because more than one
     * process may be writing to the same file */
    int fd_container = scr_open(dst_file, O_WRONLY);
    if (fd_container < 0) {
      scr_err("Opening file for writing: scr_open(%s) errno=%d %s @ %s:%d",
        dst_file, errno, strerror(errno), __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

#if !defined(__APPLE__)
    /* TODO:
    posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL)
    that tells the kernel that you don't ever need the pages
    from the file again, and it won't bother keeping them in the page cache.
    */
    posix_fadvise(fd_container, 0, 0, POSIX_FADV_DONTNEED | POSIX_FADV_SEQUENTIAL);
#endif

    /* seek to offset within container */
    off_t pos = (off_t) container_offset;
    if (lseek(fd_container, pos, SEEK_SET) == (off_t)-1) {
      /* our seek failed, return an error */
      scr_err("Failed to seek to byte %lu in %s @ %s:%d",
        pos, dst_file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
      break;
    }

    /* copy data from file into container in chunks */
    unsigned long remaining = segment_length;
    while (remaining > 0) {
      /* read / write up to buf_size bytes at a time from file */
      unsigned long count = remaining;
      if (count > buf_size) {
        count = buf_size;
      }

      /* attempt to read buf_size bytes from file */
      int nread = scr_read_attempt(file, fd_src, buf, count);

      /* if we read some bytes, write them out */
      if (nread > 0) {
        /* optionally compute crc value as we go */
        if (scr_crc_on_flush) {
          crc = crc32(crc, (const Bytef*) buf, (uInt) nread);
        }

        /* write our nread bytes out */
        int nwrite = scr_write_attempt(dst_file, fd_container, buf, nread);

        /* check for a write error or a short write */
        if (nwrite != nread) {
          /* write had a problem, stop copying and return an error */
          rc = SCR_FAILURE;
          break;
        }

        /* subtract the bytes we've processed from the number remaining */
        remaining -= (unsigned long) nread;
      }

      /* assume a short read is an error */
      if (nread < count) {
        /* read had a problem, stop copying and return an error */
        rc = SCR_FAILURE;
        break;
      }

      /* check for a read error, stop copying and return an error */
      if (nread < 0) {
        /* read had a problem, stop copying and return an error */
        rc = SCR_FAILURE;
        break;
      }
    }

    /* close container */
    if (scr_close(dst_file, fd_container) != SCR_SUCCESS) {
      rc = SCR_FAILURE;
    }

    /* free the container file name and path */
    scr_free(&dst_file);
    scr_path_delete(&dst_path);
  }

  /* close the source file */
  if (scr_close(file, fd_src) != SCR_SUCCESS) {
    rc = SCR_FAILURE;
  }

  /* free buffer */
  scr_free(&buf);

  /* verify / set crc value */
  if (rc == SCR_SUCCESS) {
    uLong crc2;
    if (scr_crc_on_flush) {
      if (scr_meta_get_crc32(meta, &crc2) == SCR_SUCCESS) {
        /* if a crc is already set in the meta data, check that we computed the same value */
        if (crc != crc2) {
          scr_err("CRC32 mismatch detected when flushing file %s @ %s:%d",
            file, __FILE__, __LINE__
          );
          rc = SCR_FAILURE;
        }
      } else {
        /* if there is no crc set, let's set it now */
        scr_meta_set_crc32(meta, crc);
      }
    }
  }

  return rc;
}
Esempio n. 4
0
/* flush files specified in list, and record corresponding entries for summary file */
static int scr_flush_files_list(scr_hash* file_list, scr_hash* summary)
{
  /* assume we will succeed in this flush */
  int rc = SCR_SUCCESS;

  /* flush each of my files and fill in summary data structure */
  scr_hash_elem* elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (elem = scr_hash_elem_first(files);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(elem);

    /* convert file to path and extract name of file */
    scr_path* path_name = scr_path_from_str(file);
    scr_path_basename(path_name);

    /* get the hash for this element */
    scr_hash* hash = scr_hash_elem_hash(elem);

    /* get meta data for this file */
    scr_meta* meta = scr_hash_get(hash, SCR_KEY_META);

    /* if segments are defined, we flush the file to its containers,
     * otherwise we copy the file out as is */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* TODO: PRESERVE get original filename here */

      /* add this file to the summary file */
      char* name = scr_path_strdup(path_name);
      scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
      scr_free(&name);

// USERDEF fixme!
      /* flush the file to the containers listed in its segmenets */
      if (scr_flush_file_to_containers(file, meta, segments, scr_prefix) == SCR_SUCCESS) {
        /* successfully flushed this file, record the filesize */
        unsigned long filesize = 0;
        if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
          scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
        }

        /* record the crc32 if one was computed */
        uLong crc = 0;
        if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
          scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
        }

        /* record segment information in summary file */
        scr_hash* segments_copy = scr_hash_new();
        scr_hash_merge(segments_copy, segments);
        scr_hash_set(file_hash, SCR_SUMMARY_6_KEY_SEGMENT, segments_copy);
      } else {
        /* the flush failed */
        rc = SCR_FAILURE;

        /* explicitly mark file as incomplete */
        scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
      }
    } else {
      /* get directory to flush file to */
      char* dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &dir) == SCR_SUCCESS) {
        /* create full path of destination file */
        scr_path* path_full = scr_path_from_str(dir);
        scr_path_append(path_full, path_name);

        /* get relative path to flushed file from SCR_PREFIX directory */
        scr_path* path_relative = scr_path_relative(scr_prefix_path, path_full);
        if (! scr_path_is_null(path_relative)) {
          /* record the name of the file in the summary hash, and get reference to a hash for this file */
          char* name = scr_path_strdup(path_relative);
          scr_hash* file_hash = scr_hash_set_kv(summary, SCR_SUMMARY_6_KEY_FILE, name);
          scr_free(&name);

          /* flush the file and fill in the meta data for this file */
          if (scr_flush_a_file(file, dir, meta) == SCR_SUCCESS) {
            /* successfully flushed this file, record the filesize */
            unsigned long filesize = 0;
            if (scr_meta_get_filesize(meta, &filesize) == SCR_SUCCESS) {
              scr_hash_util_set_bytecount(file_hash, SCR_SUMMARY_6_KEY_SIZE, filesize);
            }

            /* record the crc32 if one was computed */
            uLong crc = 0;
            if (scr_meta_get_crc32(meta, &crc) == SCR_SUCCESS) {
              scr_hash_util_set_crc32(file_hash, SCR_SUMMARY_6_KEY_CRC, crc);
            }
          } else {
            /* the flush failed */
            rc = SCR_FAILURE;

            /* explicitly mark incomplete files */
            scr_hash_set_kv_int(file_hash, SCR_SUMMARY_6_KEY_COMPLETE, 0);
          }
        } else {
          scr_abort(-1, "Failed to get relative path to directory %s from %s @ %s:%d",
            dir, scr_prefix, __FILE__, __LINE__
          );
        }

        /* free relative and full paths */
        scr_path_delete(&path_relative);
        scr_path_delete(&path_full);
      } else {
        scr_abort(-1, "Failed to read directory to flush file to @ %s:%d",
          __FILE__, __LINE__
        );
      }
    }

    /* free the file name path */
    scr_path_delete(&path_name);
  }

  return rc;
}
Esempio n. 5
0
/* read contents of summary file */
static int scr_fetch_summary(
  const char* summary_dir,
  scr_hash* file_list)
{
  /* assume that we won't succeed in our fetch attempt */
  int rc = SCR_SUCCESS;

  /* check whether summary file exists and is readable */
  if (scr_my_rank_world == 0) {
    /* check that we can access the directory */
    if (scr_file_is_readable(summary_dir) != SCR_SUCCESS) {
      scr_err("Failed to access summary directory %s @ %s:%d",
        summary_dir, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }
  }

  /* broadcast success code from rank 0 */
  MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world);
  if (rc != SCR_SUCCESS) {
    return rc;
  }

  /* add path to file list */
  scr_hash_util_set_str(file_list, SCR_KEY_PATH, summary_dir);

  /* build path to summary file */
  scr_path* dataset_path = scr_path_from_str(summary_dir);
  scr_path* meta_path = scr_path_dup(dataset_path);
  scr_path_append_str(meta_path, ".scr");
  scr_path_reduce(meta_path);

  /* rank 0 reads the summary file */
  scr_hash* header = scr_hash_new();
  if (scr_my_rank_world == 0) {
    /* build path to summary file */
    scr_path* summary_path = scr_path_dup(meta_path);
    scr_path_append_str(summary_path, "summary.scr");
    const char* summary_file = scr_path_strdup(summary_path);

    /* open file for reading */
    int fd = scr_open(summary_file, O_RDONLY);
    if (fd >= 0) {
      /* read summary hash */
      ssize_t header_size = scr_hash_read_fd(summary_file, fd, header);
      if (header_size < 0) {
        rc = SCR_FAILURE;
      }

      /* TODO: check that the version is correct */

      /* close the file */
      scr_close(summary_file, fd);
    } else {
      scr_err("Failed to open summary file %s @ %s:%d",
        summary_file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }

    /* free summary path and string */
    scr_free(&summary_file);
    scr_path_delete(&summary_path);
  }

  /* broadcast success code from rank 0 */
  MPI_Bcast(&rc, 1, MPI_INT, 0, scr_comm_world);
  if (rc != SCR_SUCCESS) {
    goto cleanup;
  }

  /* broadcast the summary hash */
  scr_hash_bcast(header, 0, scr_comm_world);

  /* extract and record the datast in file list */
  scr_hash* dataset_hash = scr_hash_new();
  scr_dataset* dataset = scr_hash_get(header, SCR_SUMMARY_6_KEY_DATASET);
  scr_hash_merge(dataset_hash, dataset);
  scr_hash_set(file_list, SCR_SUMMARY_6_KEY_DATASET, dataset_hash);

  /* build path to rank2file map */
  scr_path* rank2file_path = scr_path_dup(meta_path);
  scr_path_append_str(rank2file_path, "rank2file.scr");

  /* fetch file names and offsets containing file hash data */
  int valid = 0;
  char* file = NULL;
  unsigned long offset = 0;
  if (scr_my_rank_world == 0) {
    /* rank 0 is only valid reader to start with */
    valid  = 1;
    file   = scr_path_strdup(rank2file_path);
    offset = 0;
  }
  if (scr_fetch_rank2file_map(dataset_path, 1, &valid, &file, &offset)
      != SCR_SUCCESS)
  {
    rc = SCR_FAILURE;
  }

  /* create hashes to exchange data */
  scr_hash* send = scr_hash_new();
  scr_hash* recv = scr_hash_new();

  /* read data from file */
  if (valid) {
    /* open file if necessary */
    int fd = scr_open(file, O_RDONLY);
    if (fd >= 0) {
      /* create hash to hold file contents */
      scr_hash* save = scr_hash_new();

      /* read hash from file */
      scr_lseek(file, fd, offset, SEEK_SET);
      ssize_t readsize = scr_hash_read_fd(file, fd, save);
      if (readsize < 0) {
        scr_err("Failed to read rank2file map file %s @ %s:%d",
          file, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* check that the number of ranks match */
      int ranks = 0;
      scr_hash_util_get_int(save, SCR_SUMMARY_6_KEY_RANKS, &ranks);
      if (ranks != scr_ranks_world) {
        scr_err("Invalid number of ranks in %s, got %d expected %d @ %s:%d",
          file, ranks, scr_ranks_world, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* delete current send hash, set it to values from file,
       * delete file hash */
      scr_hash_delete(&send);
      send = scr_hash_extract(save, SCR_SUMMARY_6_KEY_RANK);
      scr_hash_delete(&save);

      /* close the file */
      scr_close(file, fd);
    } else {
      scr_err("Failed to open rank2file map %s @ %s:%d",
        file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }

    /* delete file name string */
    scr_free(&file);
  }

  /* check that everyone read the data ok */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup_hashes;
  }

  /* scatter to groups */
  scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT);

  /* iterate over the ranks that sent data to us, and set up our
   * list of files */
  scr_hash_elem* elem;
  for (elem = scr_hash_elem_first(recv);
       elem != NULL;
       elem = scr_hash_elem_next(elem))
  {
    /* the key is the source rank, which we don't care about,
     * the info we need is in the element hash */
    scr_hash* elem_hash = scr_hash_elem_hash(elem);

    /* get pointer to file hash */
    scr_hash* file_hash = scr_hash_get(elem_hash, SCR_SUMMARY_6_KEY_FILE);
    if (file_hash != NULL) {
      /* TODO: parse summary file format */
      scr_hash_merge(file_list, elem_hash);
    } else {
      rc = SCR_FAILURE;
    }
  }

  /* fill in file list parameters */
  if (rc == SCR_SUCCESS) {
    /* if we're not using containers, add PATH entry for each of our
     * files */
    scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
    for (elem = scr_hash_elem_first(files);
         elem != NULL;
         elem = scr_hash_elem_next(elem))
    {
      /* get the file name */
      char* file = scr_hash_elem_key(elem);

      /* combine the file name with the summary directory to build a
       * full path to the file */
      scr_path* path_full = scr_path_dup(dataset_path);
      scr_path_append_str(path_full, file);

      /* subtract off last component to get just the path */
      scr_path_dirname(path_full);
      char* path = scr_path_strdup(path_full);

      /* record path in file list */
      scr_hash* hash = scr_hash_elem_hash(elem);
      scr_hash_util_set_str(hash, SCR_KEY_PATH, path);

      /* free the path and string */
      scr_free(&path);
      scr_path_delete(&path_full);
    }
  }

  /* check that everyone read the data ok */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup_hashes;
  }

cleanup_hashes:
  /* delete send and receive hashes */
  scr_hash_delete(&recv);
  scr_hash_delete(&send);

  /* free string and path for rank2file map */
  scr_path_delete(&rank2file_path);

cleanup:
  /* free the header hash */
  scr_hash_delete(&header);

  /* free path for dataset directory */
  scr_path_delete(&meta_path);
  scr_path_delete(&dataset_path);

  return rc;
}
Esempio n. 6
0
static int scr_fetch_rank2file_map(
  const scr_path* dataset_path,
  int             depth,
  int*            ptr_valid,
  char**          ptr_file,
  unsigned long*  ptr_offset)
{
  int rc = SCR_SUCCESS;

  /* get local variables so we don't have to deference everything */
  int valid            = *ptr_valid;
  char* file           = *ptr_file;
  unsigned long offset = *ptr_offset;

  /* create a hash to hold section of file */
  scr_hash* hash = scr_hash_new();

  /* if we can read from file do it */
  if (valid) {
    /* open file if we haven't already */
    int fd = scr_open(file, O_RDONLY);
    if (fd >= 0) {
      /* read our segment from the file */
      scr_lseek(file, fd, offset, SEEK_SET);
      ssize_t read_rc = scr_hash_read_fd(file, fd, hash);
      if (read_rc < 0) {
        scr_err("Failed to read from %s @ %s:%d",
          file, __FILE__, __LINE__
        );
        rc = SCR_FAILURE;
      }

      /* close the file */
      scr_close(file, fd);
    } else {
      scr_err("Failed to open rank2file map %s @ %s:%d",
        file, __FILE__, __LINE__
      );
      rc = SCR_FAILURE;
    }
  }

  /* check for read errors */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup;
  }

  /* create hashes to exchange data */
  scr_hash* send = scr_hash_new();
  scr_hash* recv = scr_hash_new();

  /* copy rank data into send hash */
  if (valid) {
    scr_hash* rank_hash = scr_hash_get(hash, SCR_SUMMARY_6_KEY_RANK);
    scr_hash_merge(send, rank_hash);
  }

  /* exchange hashes */
  scr_hash_exchange_direction(send, recv, scr_comm_world, SCR_HASH_EXCHANGE_RIGHT);

  /* see if anyone sent us anything */
  int newvalid = 0;
  char* newfile = NULL;
  unsigned long newoffset = 0;
  scr_hash_elem* elem = scr_hash_elem_first(recv);
  if (elem != NULL) {
    /* got something, so now we'll read in the next step */
    newvalid = 1;

    /* get file name we should read */
    scr_hash* elem_hash = scr_hash_elem_hash(elem);
    char* value;
    if (scr_hash_util_get_str(elem_hash, SCR_SUMMARY_6_KEY_FILE, &value)
        == SCR_SUCCESS)
    {
      /* return string of full path to file to caller */
      scr_path* newpath = scr_path_dup(dataset_path);
      scr_path_append_str(newpath, value);
      newfile = scr_path_strdup(newpath);
      scr_path_delete(&newpath);
    } else {
      rc = SCR_FAILURE;
    }

    /* get offset we should start reading from */
    if (scr_hash_util_get_bytecount(elem_hash, SCR_SUMMARY_6_KEY_OFFSET, &newoffset)
        != SCR_SUCCESS)
    {
      rc = SCR_FAILURE;
    }
  }

  /* free the send and receive hashes */
  scr_hash_delete(&recv);
  scr_hash_delete(&send);

  /* get level id, and broadcast it from rank 0,
   * which we assume to be a reader in all steps */
  int level_id = -1;
  if (valid) {
    if (scr_hash_util_get_int(hash, SCR_SUMMARY_6_KEY_LEVEL, &level_id)
        != SCR_SUCCESS)
    {
      rc = SCR_FAILURE;
    }
  }
  MPI_Bcast(&level_id, 1, MPI_INT, 0, scr_comm_world);

  /* check for read errors */
  if (! scr_alltrue(rc == SCR_SUCCESS)) {
    rc = SCR_FAILURE;
    goto cleanup;
  }

  /* set parameters for output or next iteration,
   * we already took care of updating ptr_fd earlier */
  if (valid) {
    scr_free(ptr_file);
  }
  *ptr_valid  = newvalid;
  *ptr_file   = newfile;
  *ptr_offset = newoffset;

  /* recurse if we still have levels to read */
  if (level_id > 1) {
    rc = scr_fetch_rank2file_map(dataset_path, depth+1, ptr_valid, ptr_file, ptr_offset);
  }

cleanup:
  /* free the hash */
  scr_hash_delete(&hash);

  return rc;
}
Esempio n. 7
0
/* fetch files listed in hash into specified cache directory,
 * update filemap and fill in total number of bytes fetched,
 * returns SCR_SUCCESS if successful */
static int scr_fetch_files_list(
  const scr_hash* file_list,
  const char* dir,
  scr_filemap* map)
{
  /* assume we'll succeed in fetching our files */
  int rc = SCR_SUCCESS;

  /* assume we don't have any files to fetch */
  int my_num_files = 0;

  /* get dataset id */
  int id;
  scr_dataset* dataset = scr_hash_get(file_list, SCR_KEY_DATASET);
  scr_dataset_get_id(dataset, &id);

  /* now iterate through the file list and fetch each file */
  scr_hash_elem* file_elem = NULL;
  scr_hash* files = scr_hash_get(file_list, SCR_KEY_FILE);
  for (file_elem = scr_hash_elem_first(files);
       file_elem != NULL;
       file_elem = scr_hash_elem_next(file_elem))
  {
    /* get the filename */
    char* file = scr_hash_elem_key(file_elem);

    /* get a pointer to the hash for this file */
    scr_hash* hash = scr_hash_elem_hash(file_elem);

    /* check whether we are supposed to fetch this file */
    /* TODO: this is a hacky way to avoid reading a redundancy file
     * back in under the assumption that it's an original file, which
     * breaks our redundancy computation due to a name conflict on
     * the file names */
    scr_hash_elem* no_fetch_hash = scr_hash_elem_get(hash, SCR_SUMMARY_6_KEY_NOFETCH);
    if (no_fetch_hash != NULL) {
      continue;
    }

    /* increment our file count */
    my_num_files++;

    /* build the destination file name */
    scr_path* path_newfile = scr_path_from_str(file);
    scr_path_basename(path_newfile);
    scr_path_prepend_str(path_newfile, dir);
    char* newfile = scr_path_strdup(path_newfile);
      
    /* add the file to our filemap and write it to disk before creating
     * the file, this way we have a record that it may exist before we
     * actually start to fetch it */
    scr_filemap_add_file(map, id, scr_my_rank_world, newfile);
    scr_filemap_write(scr_map_file, map);

    /* get the file size */
    unsigned long filesize = 0;
    if (scr_hash_util_get_unsigned_long(hash, SCR_KEY_SIZE, &filesize) != SCR_SUCCESS) {
      scr_err("Failed to read file size from summary data @ %s:%d",
        __FILE__, __LINE__
      );
      rc = SCR_FAILURE;

      /* free path and string */
      scr_free(&newfile);
      scr_path_delete(&path_newfile);

      break;
    }

    /* check for a complete flag */
    int complete = 1;
    if (scr_hash_util_get_int(hash, SCR_KEY_COMPLETE, &complete) != SCR_SUCCESS) {
      /* in summary file, the absence of a complete flag on a file
       * implies the file is complete */
      complete = 1;
    }

    /* create a new meta data object for this file */
    scr_meta* meta = scr_meta_new();

    /* set the meta data */
    scr_meta_set_filename(meta, newfile);
    scr_meta_set_filetype(meta, SCR_META_FILE_USER);
    scr_meta_set_filesize(meta, filesize);
    scr_meta_set_complete(meta, 1);
    /* TODODSET: move the ranks field elsewhere, for now it's needed
     * by scr_index.c */
    scr_meta_set_ranks(meta, scr_ranks_world);

    /* get the crc, if set, and add it to the meta data */
    uLong crc;
    if (scr_hash_util_get_crc32(hash, SCR_KEY_CRC, &crc) == SCR_SUCCESS) {
      scr_meta_set_crc32(meta, crc);
    }

    /* fetch file from containers if they are defined, otherwise fetch
     * the native file */
    scr_hash* segments = scr_hash_get(hash, SCR_SUMMARY_6_KEY_SEGMENT);
    if (segments != NULL) {
      /* get source path */
      char* from_dir;
      if (scr_hash_util_get_str(file_list, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        /* fetch file from containers */
        if (scr_fetch_file_from_containers(newfile, meta, segments, from_dir) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to find base dataset directory in file list */
        rc = SCR_FAILURE;
      }
    } else {
      /* fetch native file, lookup directory for this file */
      char* from_dir;
      if (scr_hash_util_get_str(hash, SCR_KEY_PATH, &from_dir) == SCR_SUCCESS) {
        if (scr_fetch_file(newfile, from_dir, meta) != SCR_SUCCESS) {
          /* failed to fetch file, mark it as incomplete */
          scr_meta_set_complete(meta, 0);
          rc = SCR_FAILURE;
        }
      } else {
        /* failed to read source directory, mark file as incomplete */
        scr_meta_set_complete(meta, 0);
        rc = SCR_FAILURE;
      }
    }

    /* TODODSET: want to write out filemap before we start to fetch
     * each file? */

    /* mark the file as complete */
    scr_filemap_set_meta(map, id, scr_my_rank_world, newfile, meta);

    /* free the meta data object */
    scr_meta_delete(&meta);

    /* free path and string */
    scr_free(&newfile);
    scr_path_delete(&path_newfile);
  }

  /* set the expected number of files for this dataset */
  scr_filemap_set_expected_files(map, id, scr_my_rank_world, my_num_files);
  scr_filemap_write(scr_map_file, map);

  return rc;
}