Exemplo n.º 1
0
ffindex_index_t* ffindex_tree_unlink(ffindex_index_t* index, char* name_to_unlink)
{
  if(index->tree_root == NULL)
  {
    fferror_print(__FILE__, __LINE__, __func__, "tree is NULL");
    return NULL;
  }
  ffindex_entry_t search;
  strncpy(search.name, name_to_unlink, FFINDEX_MAX_ENTRY_NAME_LENTH);
  tdelete((const void *)&search, &index->tree_root, ffindex_compare_entries_by_name);
  return index;
}
Exemplo n.º 2
0
/* Insert all files from directory into ffindex */
int ffindex_insert_dir(FILE *data_file, FILE *index_file, size_t *start_offset, char *input_dir_name)
{
  DIR *dir = opendir(input_dir_name);
  if(dir == NULL)
  {
    fferror_print(__FILE__, __LINE__, __func__, input_dir_name);
    return -1;
  }

  size_t input_dir_name_len = strlen(input_dir_name);
  char path[PATH_MAX];
  strncpy(path, input_dir_name, NAME_MAX);
  if(input_dir_name[input_dir_name_len - 1] != '/')
  {
    path[input_dir_name_len] = '/';
    input_dir_name_len += 1;
  }

  size_t offset = *start_offset;
  struct dirent *entry;
  while((entry = readdir(dir)) != NULL)
  {
    if(entry->d_name[0] == '.')
      continue;
    strncpy(path + input_dir_name_len, entry->d_name, NAME_MAX);
    struct stat sb;
    if(stat(path, &sb) == -1)
      fferror_print(__FILE__, __LINE__, __func__, path);
    if(!S_ISREG(sb.st_mode))
      continue;
    ffindex_insert_file(data_file, index_file, &offset, path, entry->d_name);
  }
  closedir(dir);

  /* update return value */
  *start_offset = offset;

  return 0;
}
Exemplo n.º 3
0
/* Insert one file by handle into ffindex */
int ffindex_insert_filestream(FILE *data_file, FILE *index_file, size_t *offset, FILE* file, char *name)
{
    int myerrno = 0;
    /* copy and paste file to data file */
    char buffer[FFINDEX_BUFFER_SIZE];
    size_t offset_before = *offset;
    size_t read_size;
    while((read_size = fread(buffer, sizeof(char), sizeof(buffer), file)) > 0)
    {
      size_t write_size = fwrite(buffer, sizeof(char), read_size, data_file);
      *offset += write_size;
      if(read_size != write_size)
        fferror_print(__FILE__, __LINE__, __func__, name);
    }

    /* Seperate by '\0' and thus also make sure at least one byte is written */
    buffer[0] = '\0';
    fwrite(buffer, sizeof(char), 1, data_file);
    *offset += 1;
    if(ferror(data_file) != 0)
      goto EXCEPTION_ffindex_insert_file;

    /* write index entry */
    fprintf(index_file, "%s\t%zd\t%zd\n", name, offset_before, *offset - offset_before);

    if(ferror(file) != 0)
      goto EXCEPTION_ffindex_insert_file;

    return myerrno;

EXCEPTION_ffindex_insert_file:
    {
      fferror_print(__FILE__, __LINE__, __func__, "");
      return myerrno;
    }
}
Exemplo n.º 4
0
ffindex_index_t* ffindex_index_parse(FILE *index_file, size_t num_max_entries)
{
  if(num_max_entries == 0)
    num_max_entries = FFINDEX_MAX_INDEX_ENTRIES_DEFAULT;
  size_t nbytes = sizeof(ffindex_index_t) + (sizeof(ffindex_entry_t) * num_max_entries);
  ffindex_index_t *index = (ffindex_index_t *)malloc(nbytes);
  index->num_max_entries = num_max_entries;
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, __func__, "malloc failed");
    return NULL;
  }

  index->file = index_file;
  index->index_data = ffindex_mmap_data(index_file, &(index->index_data_size));
  index->type = SORTED_ARRAY; /* Assume a sorted file for now */
  int i = 0;
  char* d = index->index_data;
  char* end;
  /* Faster than scanf per line */
  for(i = 0; d < (index->index_data + index->index_data_size); i++)
  {
    int p;
    for(p = 0; *d != '\t'; d++)
      index->entries[i].name[p++] = *d;
    index->entries[i].name[p] = '\0';
    index->entries[i].offset = strtol(d, &end, 10);
    d = end;
    index->entries[i].length  = strtol(d, &end, 10);
    d = end + 1; /* +1 for newline */
  }

  index->n_entries = i;

  if(index->n_entries == 0)
    return NULL;

  return index;
}
Exemplo n.º 5
0
int main(int argn, char **argv)
{
  int mpi_error,
      mpi_rank,
      mpi_num_procs;

  mpi_error = MPI_Init(&argn, &argv);
  mpi_error = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
  mpi_error = MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs);

  int opt;
  char *data_filename_out  = NULL,
       *index_filename_out = NULL;

  while ((opt = getopt(argn, argv, "d:i:")) != -1)
  {
    switch (opt)
    {
      case 'd':
        data_filename_out = optarg;
        break;
      case 'i':
        index_filename_out = optarg;
        break;
    }
  }

  if(argn - optind < 3)
  {
    fprintf(stderr, "Not enough arguments %d.\n", optind - argn);
    fprintf(stderr, "USAGE: %s -d DATA_FILENAME_OUT -i INDEX_FILENAME_OUT DATA_FILENAME INDEX_FILENAME -- PROGRAM [PROGRAM_ARGS]*\n"
                    "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n",
                    basename(argv[0]));
    return -1;
  }
  read_buffer = malloc(400 * 1024 * 1024);
  char *data_filename  = argv[optind++];
  char *index_filename = argv[optind++];
  char *program_name   = argv[optind];
  char **program_argv = argv + optind;

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename);  exit(EXIT_FAILURE); }

  FILE *data_file_out = NULL, *index_file_out = NULL;
  // Setup one output FFindex for each MPI process
  if(data_filename_out != NULL && index_filename_out != NULL)
  {
    char* data_filename_out_rank  = malloc(FILENAME_MAX);
    char* index_filename_out_rank = malloc(FILENAME_MAX);
    snprintf( data_filename_out_rank, FILENAME_MAX, "%s.%d", data_filename_out,  mpi_rank);
    snprintf(index_filename_out_rank, FILENAME_MAX, "%s.%d", index_filename_out, mpi_rank);
    data_file_out  = fopen(data_filename_out_rank,  "w+");
    index_file_out = fopen(index_filename_out_rank, "w+");

    if( data_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename_out);  exit(EXIT_FAILURE); }
    if(index_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename_out);  exit(EXIT_FAILURE); }
  }

  int capture_stdout = (data_file_out != NULL);

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }
  
  // Ignore SIGPIPE
  struct sigaction handler;
  handler.sa_handler = SIG_IGN;
  sigemptyset(&handler.sa_mask);
  handler.sa_flags = 0;
  sigaction(SIGPIPE, &handler, NULL);

  size_t batch_size, range_start, range_end;

  if(index->n_entries >= mpi_num_procs)
    batch_size = index->n_entries / mpi_num_procs;
  else
    batch_size = 0;
  range_start = mpi_rank * batch_size;
  range_end = range_start + batch_size;


  size_t offset = 0;
  // Foreach entry
  if(batch_size > 0)
    for(size_t entry_index = range_start; entry_index < range_end; entry_index++)
    {
      ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index);
      if(entry == NULL) { perror(entry->name); return errno; }
      int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset);
      if(error != 0)
        { perror(entry->name); break; }
    }
  ssize_t left_over = index->n_entries - (batch_size * mpi_num_procs);
  if(mpi_rank < left_over)
  {
    size_t left_over_entry_index = (batch_size * mpi_num_procs) + mpi_rank;
    ffindex_entry_t* entry = ffindex_get_entry_by_index(index, left_over_entry_index);
    if(entry == NULL) { perror(entry->name); return errno; }
    //fprintf(stderr, "handling left over: %ld\n", left_over_entry_index);
    int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset);
    if(error != 0)
      perror(entry->name);
  }

  if(capture_stdout)
    fclose(data_file_out);
  if(index_file_out != NULL)
    fclose(index_file_out);

  MPI_Barrier(MPI_COMM_WORLD);


  // merge FFindexes in master
  if(data_filename_out != NULL && mpi_rank == 0)
  {
    char* merge_command  = malloc(FILENAME_MAX * 5);
    for(int i = 0; i < mpi_num_procs; i++)
    {
      snprintf( merge_command, FILENAME_MAX, "ffindex_build -as %s %s -d %s.%d -i %s.%d",
                data_filename_out, index_filename_out, data_filename_out, i, index_filename_out, i);
      //puts(merge_command);
      system(merge_command);
    }
  }

  MPI_Finalize();

  return EXIT_SUCCESS;
}
Exemplo n.º 6
0
int main(int argn, char **argv)
{
  int sort = 0, unlink = 0, version = 0, use_tree = 1;
  int err = EXIT_SUCCESS;
  char* list_filenames[MAX_FILENAME_LIST_FILES];
  size_t list_filenames_index = 0;

  static struct option long_options[] =
  {
    { "file",    required_argument, NULL, 'f' },
    { "sort",    no_argument, NULL, 's' },
    { "tree",    no_argument, NULL, 't' },
    { "unlink",  no_argument, NULL, 'u' },
    { "version", no_argument, NULL, 'v' },
    { NULL,      0,           NULL,  0  }
  };

  int opt;
  while (1)
  {
    int option_index = 0;
    opt = getopt_long(argn, argv, "stuvf:", long_options, &option_index);
    if (opt == -1)
      break;  

    switch (opt)
    {
      case 'f':
        list_filenames[list_filenames_index++] = optarg;
        break;
      case 's':
        sort = 1;
        break;
      case 't':
        use_tree = 1;
        break;
      case 'u':
        unlink = 1;
        break;
      case 'v':
        version = 1;
        break;
      default:
        usage(argv[0]);
        return EXIT_FAILURE;
    }
  }

  if(version == 1)
  {
    /* Don't you dare running it on a platform where byte != 8 bits */
    printf("%s version %.2f, off_t = %zd bits\n", argv[0], FFINDEX_VERSION, sizeof(off_t) * 8);
    return EXIT_SUCCESS;
  }

  if(optind >= argn)
  {
    usage(argv[0]);
    return EXIT_FAILURE;
  }

  char *index_filename = argv[optind++];
  FILE *index_file;

  index_file = fopen(index_filename, "r+");
  if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; }

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL) { perror("ffindex_index_parse failed"); return (EXIT_FAILURE); }

  fclose(index_file);

  /* Unlink entries */
  if(unlink)
  {
    if(use_tree)
    {
      /* Build tree */
      index = ffindex_index_as_tree(index);

      /* For each list_file unlink all entries */
      if(list_filenames_index > 0)
        for(int i = 0; i < list_filenames_index; i++)
        {
          printf("Unlinking entries from '%s'\n", list_filenames[i]);
          FILE *list_file = fopen(list_filenames[i], "r");
          if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; }

          /* unlink entries in file, one per line */
          char path[PATH_MAX];
          while(fgets(path, PATH_MAX, list_file) != NULL)
            index = ffindex_unlink(index, ffnchomp(path, strlen(path)));
        }

      /* unlink entries specified by args */
      for(int i = optind; i < argn; i++)
        index = ffindex_unlink(index, argv[i]);
    }
    else
    {
      char** sorted_names_to_unlink = malloc(FFINDEX_MAX_INDEX_ENTRIES_DEFAULT * sizeof(char *));
      if(sorted_names_to_unlink == NULL)
        fferror_print(__FILE__, __LINE__, __func__, "malloc failed");
      /* For each list_file unlink all entries */
      if(list_filenames_index > 0)
        for(int i = 0; i < list_filenames_index; i++)
        {
          printf("Unlinking entries from '%s'\n", list_filenames[i]);
          FILE *list_file = fopen(list_filenames[i], "r");
          if( list_file == NULL) { perror(list_filenames[i]); return EXIT_FAILURE; }

          /* unlink entries in file, one per line */
          char path[PATH_MAX];
          while(fgets(path, PATH_MAX, list_file) != NULL)
            sorted_names_to_unlink[i++] = ffnchomp(strdup(path), strlen(path));
          ffindex_unlink_entries(index, sorted_names_to_unlink, i);
        }

      /* unlink entries specified by args */
      int y = 0;
      for(int i = optind; i < argn; i++, y++)
        sorted_names_to_unlink[y] = argv[i];
      ffindex_unlink_entries(index, sorted_names_to_unlink, y);

      /* Sort the index entries and write back */
      if(sort)
      {
        ffindex_sort_index_file(index);
        index_file = fopen(index_filename, "w");
        if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; }
        err += ffindex_write(index, index_file);
      }
    }
  }

  /* Write index back */
  index_file = fopen(index_filename, "w");
  if(index_file == NULL) { perror(index_filename); return EXIT_FAILURE; }
  err += ffindex_write(index, index_file);
  return err;
}
Exemplo n.º 7
0
int main(int argn, char **argv)
{
  int by_index = 0;
  static struct option long_options[] =
  {
    { "byindex", no_argument, NULL, 'n' },
    { NULL,      0,           NULL,  0  }
  };

  int opt;
  while (1)
  {
    int option_index = 0;
    opt = getopt_long(argn, argv, "n", long_options, &option_index);
    if (opt == -1)
      break;

    switch (opt)
    {
      case 'n':
        by_index = 1;
        break;
      default:
        usage(argv[0]);
        return EXIT_FAILURE;
    }
  }

  if(argn < 3)
  {
    usage(argv[0]);
    return EXIT_FAILURE;
  }
  char *data_filename  = argv[optind++];
  char *index_filename = argv[optind++];

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", index_filename);  exit(EXIT_FAILURE); }

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    exit(EXIT_FAILURE);
  }

  if(by_index)
  {
    for(int i = optind; i < argn; i++)
    {
      size_t index_n = atol(argv[i]) - 1; // offset from 0 but specify from 1

      ffindex_entry_t* entry = ffindex_get_entry_by_index(index, index_n);
      if(entry == NULL)
      {
        errno = ENOENT; 
        fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]);
      }
      else
      {
        char *filedata = ffindex_get_data_by_entry(data, entry);
        if(filedata == NULL)
        {
          errno = ENOENT; 
          fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]);
        }
        else
          fwrite(filedata, entry->length - 1, 1, stdout);
      }
    }
  }
  else // by name
  {
    for(int i = optind; i < argn; i++)
    {
      char *filename = argv[i];

      ffindex_entry_t* entry = ffindex_get_entry_by_name(index, filename);
      if(entry == NULL)
      {
        errno = ENOENT; 
        fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename);
      }
      else
      {
        char *filedata = ffindex_get_data_by_entry(data, entry);
        if(filedata == NULL)
        {
          errno = ENOENT; 
          fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename);
        }
        else
          fwrite(filedata, entry->length - 1, 1, stdout);
      }
    }

      /* Alternative code using (slower) ffindex_fopen */
      /*
         FILE *file = ffindex_fopen(data, index, filename);
         if(file == NULL)
         {
         errno = ENOENT; 
         fferror_print(__FILE__, __LINE__, "ffindex_fopen file not found in index", filename);
         }
         else
         {
         char line[LINE_MAX];
         while(fgets(line, LINE_MAX, file) != NULL)
         printf("%s", line);
         }
         */
  }

  return 0;
}
Exemplo n.º 8
0
int main(int argn, char **argv)
{
  if(argn < 4)
  {
    fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME PROGRAM [PROGRAM_ARGS]*\n"
                    "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n",
                    argv[0]);
    return -1;
  }
  char *data_filename  = argv[1];
  char *index_filename = argv[2];
  char *program_name   = argv[3];
  char **program_argv = argv + 3;

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename);  exit(EXIT_FAILURE); }

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    exit(EXIT_FAILURE);
  }
  
  // Ignore SIGPIPE
  struct sigaction handler;
  handler.sa_handler = SIG_IGN;
  sigemptyset(&handler.sa_mask);
  handler.sa_flags = 0;
  sigaction(SIGPIPE, &handler, NULL);

  size_t range_start = 0;
  size_t range_end = index->n_entries;

  // Foreach entry
//#pragma omp parallel for
  for(size_t entry_index = range_start; entry_index < range_end; entry_index++)
  {
    //fprintf(stderr, "index %ld\n", entry_index);
    int ret = 0;
    ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index);
    if(entry == NULL) { perror(entry->name); continue; }

    int pipefd[2];
    ret = pipe(pipefd);
    if(ret != 0) { perror(entry->name); continue; }

    pid_t child_pid = fork();
    if(child_pid == 0)
    {
      fclose(data_file);
      fclose(index_file);
      close(pipefd[1]);

      // Make pipe from parent our new stdin
      int newfd = dup2(pipefd[0], fileno(stdin));
      if(newfd < 0) { fprintf(stdout, "%d %d\n", pipefd[0], newfd); perror(entry->name); }
      close(pipefd[0]);

      // exec program with the pipe as stdin
      execvp(program_name, program_argv);
      // never reached
    }
    else if(child_pid > 0)
    {
      // Read end is for child only
      close(pipefd[0]);

      // Write file data to child's stdin.
      char *filedata = ffindex_get_data_by_entry(data, entry);
      ssize_t written = 0;
      while(written < entry->length)
      {
        int w = write(pipefd[1], filedata + written, entry->length - written);
        if(w < 0 && errno != EPIPE)   { perror(entry->name); break; }
        else if(w == 0 && errno != 0) { perror(entry->name); break; }
        else
          written += w;
      }

      close(pipefd[1]); // child gets EOF
      waitpid(child_pid, NULL, 0);
    }
    else
    {
      perror(entry->name);
      exit(errno);
    }
  }

  return 0;
}