示例#1
0
void writeIndexes(std::string A_indexFile, std::string B_indexFile, std::string oldDBIndex, std::string newDBIndex){

    FILE* A_index_file = fopen(A_indexFile.c_str(), "w");
    FILE* B_index_file = fopen(B_indexFile.c_str(), "w");

    ffindex_index_t* index_old = openIndex(oldDBIndex.c_str());
    ffindex_index_t* index_new = openIndex(newDBIndex.c_str());

    // positions in the databases
    unsigned int i = 0;
    unsigned int j = 0;

    int deleted_cnt = 0;
    int new_cnt = 0;
    int shared_cnt = 0;
    while (i < index_old->n_entries && j < index_new->n_entries){
        ffindex_entry_t* e_i = ffindex_get_entry_by_index(index_old, i);
        ffindex_entry_t* e_j = ffindex_get_entry_by_index(index_new, j);
        int cmp = strcmp(&(e_i->name[0]), &(e_j->name[0]));
        if (cmp == 0){
            // this sequence is in both databases
            fprintf(A_index_file, "%s\t%zd\t%zd\n", e_j->name, e_j->offset, e_j->length);
            shared_cnt++;
            i++;
            j++;
        }
        else if (cmp < 0){
            // sequence was deleted from the old database
            deleted_cnt++;
            i++;
        }
        else{
            // this sequence is new
            fprintf(B_index_file, "%s\t%zd\t%zd\n", e_j->name, e_j->offset, e_j->length);
            new_cnt++;
            j++;
        }
    }
    while (i < index_old->n_entries){
        deleted_cnt++;
        i++;
    }
    // add the rest of the new database to the new sequences
    while (j < index_new->n_entries){
        ffindex_entry_t* e_j = ffindex_get_entry_by_index(index_new, j);
        fprintf(B_index_file, "%s\t%zd\t%zd\n", e_j->name, e_j->offset, e_j->length);
        new_cnt++;
        j++;
    }

    // set the global count variables
    oldDBSize = index_old->n_entries;
    newDBSize = index_new->n_entries;
    deletedSeqs = deleted_cnt;
    sharedSeqs = shared_cnt;
    newSeqs = new_cnt;

    fclose(A_index_file);
    fclose(B_index_file);
}
示例#2
0
  ///////////////////////////////////////////////////////////////////////////////////////////////////
// Pull out all names from prefilter db file and copy into dbfiles_new for full HMM-HMM comparison
///////////////////////////////////////////////////////////////////////////////////////////////////
  void Prefilter::init_no_prefiltering(FFindexDatabase* cs219_database,
      std::vector<std::pair<int, std::string> >& prefiltered_entries) {
    ffindex_index_t* db_index = cs219_database->db_index;

    for (size_t n = 0; n < db_index->n_entries; n++) {
      ffindex_entry_t* entry = ffindex_get_entry_by_index(db_index, n);

      prefiltered_entries.push_back(
          std::make_pair<int, std::string>(entry->length,
              std::string(entry->name)));
    }

    HH_LOG(INFO) << "Searching " << prefiltered_entries.size()
        << " database HHMs without prefiltering" << std::endl;
  }
示例#3
0
文件: FFindex.c 项目: ahcm/ffindex
VALUE method_ffindex_get_data_by_index(VALUE self, VALUE key)
{
  ffindex_db_t * ffindex_db;
  Data_Get_Struct(self, ffindex_db_t, ffindex_db);
  
  size_t index = FIX2INT(key);
  ffindex_entry_t * entry = ffindex_get_entry_by_index(ffindex_db->ffindex, index);
  if(entry)
  {
    char * data = ffindex_get_data_by_entry(ffindex_db->ffdata, entry);
    return rb_str_new2(data);
  }
  else
    return Qnil;
}
示例#4
0
    void Payload(const size_t start, const size_t end) {
        // Foreach entry in the input file
        for (size_t entry_index = start; entry_index < end; entry_index++) {
            ffindex_entry_t *entry = ffindex_get_entry_by_index(index, entry_index);
            if (entry == NULL) {
                continue;
            }

            hhblits->Reset();

            FILE *inf = ffindex_fopen_by_entry(data, entry);
            hhblits->run(inf, entry->name);
            fclose(inf);

            for (size_t i = 0; i < outputDatabases->size(); i++) {
                outputDatabases->operator[](i).saveOutput(*hhblits, entry->name);
            }
        }
    }
示例#5
0
//////////////////////////////////////////////////////////////
// Reading in column state sequences for prefiltering
//////////////////////////////////////////////////////////////
  void Prefilter::init_prefilter(FFindexDatabase* cs219_database) {
    // Set up variables for prefiltering
    num_dbs = cs219_database->db_index->n_entries;
    first = (unsigned char**) mem_align(ALIGN_FLOAT, num_dbs * sizeof(unsigned char*));
    length = (int*) mem_align(ALIGN_FLOAT, num_dbs * sizeof(int));
    dbnames = (char**) mem_align(ALIGN_FLOAT, num_dbs * sizeof(char*));
    for (size_t n = 0; n < num_dbs; n++) {
      ffindex_entry_t* entry = ffindex_get_entry_by_index(
          cs219_database->db_index, n);
      first[n] = (unsigned char*) ffindex_get_data_by_entry(
          cs219_database->db_data, entry);
      length[n] = entry->length - 1;
      dbnames[n] = new char[strlen(entry->name) + 1];
      strcpy(dbnames[n], entry->name);
    }

    //check if cs219 format is new binary format
    checkCSFormat(5);

    HH_LOG(INFO) << "Searching " << num_dbs
        << " column state sequences." << std::endl;
  }
示例#6
0
int main(int argn, char **argv)
{
  int mpi_error,
      mpi_rank,
      mpi_num_procs;

  mpi_error = MPI_Init(&argn, &argv);
  mpi_error = MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
  mpi_error = MPI_Comm_size(MPI_COMM_WORLD, &mpi_num_procs);

  int opt;
  char *data_filename_out  = NULL,
       *index_filename_out = NULL;

  while ((opt = getopt(argn, argv, "d:i:")) != -1)
  {
    switch (opt)
    {
      case 'd':
        data_filename_out = optarg;
        break;
      case 'i':
        index_filename_out = optarg;
        break;
    }
  }

  if(argn - optind < 3)
  {
    fprintf(stderr, "Not enough arguments %d.\n", optind - argn);
    fprintf(stderr, "USAGE: %s -d DATA_FILENAME_OUT -i INDEX_FILENAME_OUT DATA_FILENAME INDEX_FILENAME -- PROGRAM [PROGRAM_ARGS]*\n"
                    "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n",
                    basename(argv[0]));
    return -1;
  }
  read_buffer = malloc(400 * 1024 * 1024);
  char *data_filename  = argv[optind++];
  char *index_filename = argv[optind++];
  char *program_name   = argv[optind];
  char **program_argv = argv + optind;

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename);  exit(EXIT_FAILURE); }

  FILE *data_file_out = NULL, *index_file_out = NULL;
  // Setup one output FFindex for each MPI process
  if(data_filename_out != NULL && index_filename_out != NULL)
  {
    char* data_filename_out_rank  = malloc(FILENAME_MAX);
    char* index_filename_out_rank = malloc(FILENAME_MAX);
    snprintf( data_filename_out_rank, FILENAME_MAX, "%s.%d", data_filename_out,  mpi_rank);
    snprintf(index_filename_out_rank, FILENAME_MAX, "%s.%d", index_filename_out, mpi_rank);
    data_file_out  = fopen(data_filename_out_rank,  "w+");
    index_file_out = fopen(index_filename_out_rank, "w+");

    if( data_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename_out);  exit(EXIT_FAILURE); }
    if(index_file_out == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename_out);  exit(EXIT_FAILURE); }
  }

  int capture_stdout = (data_file_out != NULL);

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    MPI_Finalize();
    exit(EXIT_FAILURE);
  }
  
  // Ignore SIGPIPE
  struct sigaction handler;
  handler.sa_handler = SIG_IGN;
  sigemptyset(&handler.sa_mask);
  handler.sa_flags = 0;
  sigaction(SIGPIPE, &handler, NULL);

  size_t batch_size, range_start, range_end;

  if(index->n_entries >= mpi_num_procs)
    batch_size = index->n_entries / mpi_num_procs;
  else
    batch_size = 0;
  range_start = mpi_rank * batch_size;
  range_end = range_start + batch_size;


  size_t offset = 0;
  // Foreach entry
  if(batch_size > 0)
    for(size_t entry_index = range_start; entry_index < range_end; entry_index++)
    {
      ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index);
      if(entry == NULL) { perror(entry->name); return errno; }
      int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset);
      if(error != 0)
        { perror(entry->name); break; }
    }
  ssize_t left_over = index->n_entries - (batch_size * mpi_num_procs);
  if(mpi_rank < left_over)
  {
    size_t left_over_entry_index = (batch_size * mpi_num_procs) + mpi_rank;
    ffindex_entry_t* entry = ffindex_get_entry_by_index(index, left_over_entry_index);
    if(entry == NULL) { perror(entry->name); return errno; }
    //fprintf(stderr, "handling left over: %ld\n", left_over_entry_index);
    int error = ffindex_apply_by_entry(data, index, entry, program_name, program_argv, data_file_out, index_file_out, &offset);
    if(error != 0)
      perror(entry->name);
  }

  if(capture_stdout)
    fclose(data_file_out);
  if(index_file_out != NULL)
    fclose(index_file_out);

  MPI_Barrier(MPI_COMM_WORLD);


  // merge FFindexes in master
  if(data_filename_out != NULL && mpi_rank == 0)
  {
    char* merge_command  = malloc(FILENAME_MAX * 5);
    for(int i = 0; i < mpi_num_procs; i++)
    {
      snprintf( merge_command, FILENAME_MAX, "ffindex_build -as %s %s -d %s.%d -i %s.%d",
                data_filename_out, index_filename_out, data_filename_out, i, index_filename_out, i);
      //puts(merge_command);
      system(merge_command);
    }
  }

  MPI_Finalize();

  return EXIT_SUCCESS;
}
示例#7
0
int main(int argc, char **argv) {
  bool iflag, dflag, oflag, qflag;
  iflag = dflag = oflag = qflag = false;

  std::string ffindex_header_db_prefix;
  std::string ffindex_sequence_db_prefix;
  std::string ffindex_ca3m_db_prefix;
  std::string ffindex_a3m_db_prefix;

  int c;
  while ((c = getopt(argc, argv, "i:d:o:q:h")) != -1) {
    switch (c) {
      case 'i':
        iflag = 1;
        ffindex_ca3m_db_prefix = optarg;
        break;
      case 'd':
        dflag = 1;
        ffindex_sequence_db_prefix = optarg;
        break;
      case 'o':
        oflag = 1;
        ffindex_a3m_db_prefix = optarg;
        break;
      case 'q':
        qflag = 1;
        ffindex_header_db_prefix = optarg;
        break;
      case 'h':
        usage();
        exit(0);
      case '?':
        if (optopt == 'c')
          fprintf(stderr, "Option -%c requires an argument.\n", optopt);
        else if (isprint(optopt))
          fprintf(stderr, "Unknown option `-%c'.\n", optopt);
        else
          fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
        return 1;
      default:
        abort();
    }
  }

  if(!iflag || !dflag || !oflag || !qflag) {
    std::cerr << "Missing arguments!" << std::endl;
    usage();
    exit(0);
  }

  //prepare ffindex a3m database
  std::string a3mDataFile = ffindex_a3m_db_prefix+".ffdata";
  std::string a3mIndexFile = ffindex_a3m_db_prefix+".ffindex";

  FILE *a3m_data_fh  = fopen(a3mDataFile.c_str(), "w");
  FILE *a3m_index_fh = fopen(a3mIndexFile.c_str(), "w");

  if (a3m_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m data file! (" << a3mDataFile << ")!" << std::endl;
    exit(1);
  }

  if(a3m_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m index file! (" << a3mIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t a3m_offset = 0;

  //prepare ffindex ca3m database
  std::string ca3mDataFile = ffindex_ca3m_db_prefix+".ffdata";
  std::string ca3mIndexFile = ffindex_ca3m_db_prefix+".ffindex";

  FILE *ca3m_data_fh  = fopen(ca3mDataFile.c_str(), "r");
  FILE *ca3m_index_fh = fopen(ca3mIndexFile.c_str(), "r");

  if (ca3m_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m data file! (" << ca3mDataFile << ")!" << std::endl;
    exit(1);
  }

  if(ca3m_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m index file! (" << ca3mIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t ca3m_offset;
  char* ca3m_data = ffindex_mmap_data(ca3m_data_fh, &ca3m_offset);
  ffindex_index_t* ca3m_index = ffindex_index_parse(ca3m_index_fh, 0);

  if(ca3m_index == NULL) {
    std::cerr << "ERROR: CA3M index (" << ca3mIndexFile << ") could not be loaded!" << std::endl;
    exit(1);
  }

  //prepare ffindex sequence database
  std::string sequenceDataFile = ffindex_sequence_db_prefix+".ffdata";
  std::string sequenceIndexFile = ffindex_sequence_db_prefix+".ffindex";

  FILE *sequence_data_fh  = fopen(sequenceDataFile.c_str(), "r");
  FILE *sequence_index_fh = fopen(sequenceIndexFile.c_str(), "r");

  if (sequence_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex sequence data file! (" << sequenceDataFile << ")!" << std::endl;
    exit(1);
  }

  if(sequence_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex sequence index file! (" << sequenceIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t sequence_data_size;
  char* sequence_data = ffindex_mmap_data(sequence_data_fh, &sequence_data_size);
  ffindex_index_t* sequence_index = ffindex_index_parse(sequence_index_fh, 80000000);

  if(sequence_index == NULL) {
    std::cerr << "ERROR: Sequence index could not be loaded!" << std::endl;
    exit(1);
  }

  //prepare ffindex header database
  std::string headerDataFile = ffindex_header_db_prefix + ".ffdata";
  std::string headerIndexFile = ffindex_header_db_prefix + ".ffindex";

  FILE *header_data_fh = fopen(headerDataFile.c_str(), "r");
  FILE *header_index_fh = fopen(headerIndexFile.c_str(), "r");

  if (header_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex sequence data file! ("
        << headerDataFile << ")!" << std::endl;
    exit(1);
  }

  if (header_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex header index file! ("
        << headerIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t header_data_size;
  char* header_data = ffindex_mmap_data(header_data_fh,
      &header_data_size);
  ffindex_index_t* header_index = ffindex_index_parse(header_index_fh, 1E8);

  if (header_index == NULL) {
    std::cerr << "ERROR: Header index could not be loaded!" << std::endl;
    exit(1);
  }

  //prepare input stream
  size_t ca3m_range_start = 0;
  size_t ca3m_range_end = ca3m_index->n_entries;

  // Foreach entry
  #pragma omp parallel for shared(ca3m_index, ca3m_data, a3m_data_fh, a3m_index_fh, a3m_offset)
  for(size_t entry_index = ca3m_range_start; entry_index < ca3m_range_end; entry_index++)
  {
    ffindex_entry_t* entry = ffindex_get_entry_by_index(ca3m_index, entry_index);
    if(entry == NULL) { perror(entry->name); continue; }

    char* data = ffindex_get_data_by_entry(ca3m_data, entry);

    std::stringstream* out_buffer = new std::stringstream();
    compressed_a3m::extract_a3m(data, entry->length, sequence_index, sequence_data, header_index, header_data, out_buffer);

    std::string out_string = out_buffer->str();

    #pragma omp critical
    {
      ffindex_insert_memory(a3m_data_fh, a3m_index_fh, &a3m_offset, const_cast<char*>(out_string.c_str()), out_string.size(), entry->name);
    }

    delete out_buffer;
  }

  fclose(a3m_data_fh);
  fclose(a3m_index_fh);

  ffsort_index(a3mIndexFile.c_str());
}
int main(int argc, char **argv) {
  bool iflag, sflag, oflag = false;

  std::string set_file;
  std::string ffindex_oa3m_db_prefix;
  std::string ffindex_a3m_db_prefix;

  int c;
  while ((c = getopt(argc, argv, "i:s:o:h")) != -1) {
    switch (c) {
      case 'i':
        iflag = 1;
        ffindex_a3m_db_prefix = optarg;
        break;
      case 's':
        sflag = 1;
        set_file = optarg;
        break;
      case 'o':
        oflag = optarg;
        ffindex_oa3m_db_prefix = optarg;
        break;
      case 'h':
        usage();
        exit(0);
      case '?':
        if (optopt == 'c')
          fprintf(stderr, "Option -%c requires an argument.\n", optopt);
        else if (isprint(optopt))
          fprintf(stderr, "Unknown option `-%c'.\n", optopt);
        else
          fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt);
        return 1;
      default:
        abort();
    }
  }

  if(!iflag || !sflag || !oflag) {
    std::cerr << "Missing input!" << std::endl;
    usage();
    exit(1);
  }

  //prepare ffindex a3m output database
  std::string oa3mDataFile = ffindex_oa3m_db_prefix+".ffdata";
  std::string oa3mIndexFile = ffindex_oa3m_db_prefix+".ffindex";

  FILE *oa3m_data_fh  = fopen(oa3mDataFile.c_str(), "w");
  FILE *oa3m_index_fh = fopen(oa3mIndexFile.c_str(), "w");

  if (oa3m_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex ca3m data file! (" << oa3mDataFile << ")!" << std::endl;
    exit(1);
  }

  if(oa3m_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex ca3m index file! (" << oa3mIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t oa3m_offset = 0;

  //prepare ffindex a3m database
  std::string a3mDataFile = ffindex_a3m_db_prefix+".ffdata";
  std::string a3mIndexFile = ffindex_a3m_db_prefix+".ffindex";

  FILE *a3m_data_fh  = fopen(a3mDataFile.c_str(), "r");
  FILE *a3m_index_fh = fopen(a3mIndexFile.c_str(), "r");

  if (a3m_data_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m data file! (" << a3mDataFile << ")!" << std::endl;
    exit(1);
  }

  if(a3m_index_fh == NULL) {
    std::cerr << "ERROR: Could not open ffindex a3m index file! (" << a3mIndexFile << ")!" << std::endl;
    exit(1);
  }

  size_t a3m_offset;
  char* a3m_data = ffindex_mmap_data(a3m_data_fh, &a3m_offset);
  ffindex_index_t* a3m_index = ffindex_index_parse(a3m_index_fh, 0);

  if(a3m_index == NULL) {
    std::cerr << "ERROR: A3M index could not be loaded!" << std::endl;
    exit(1);
  }

  //prepare filter
  std::set<std::string> filter;
  std::ifstream infile(set_file.c_str());

  std::string line;
  while (std::getline(infile, line)) {
    std::string item = line.substr(0, line.length());
    filter.insert(item);
  }

  infile.close();

  //prepare input stream
  size_t a3m_range_start = 0;
  size_t a3m_range_end = a3m_index->n_entries;

  // Foreach entry
  #pragma omp parallel for shared(a3m_index, a3m_data, oa3m_data_fh, oa3m_index_fh, oa3m_offset)
  for(size_t entry_index = a3m_range_start; entry_index < a3m_range_end; entry_index++)
  {
    //fprintf(stderr, "index %ld\n", entry_index);
    ffindex_entry_t* entry = ffindex_get_entry_by_index(a3m_index, entry_index);
    if(entry == NULL) { perror(entry->name); continue; }

    char* data = ffindex_get_data_by_entry(a3m_data, entry);

    std::stringstream* out_buffer = new std::stringstream();

    size_t nr_sequences = 0;

    for(size_t index = 0; index < entry->length; index++) {
      //write annotation line
      if(data[index] == '#') {
        while(data[index] != '\n' && index < entry->length) {
          out_buffer->put(data[index++]);
        }
        out_buffer->put('\n');
      }
      else if(data[index] == '>') {
        size_t start_index = index;
        while(index < entry->length && data[index] != '\n') {
          index++;
        }

        //copy line without new line
        std::string header = std::string(&data[start_index], index - start_index);
        std::string id = getNameFromHeader(header);
        bool consensus_flag = isConsensus(id);

        std::string short_id = getShortIdFromHeader(header);

        while(index < entry->length - 1 && data[index] != '>') {
          index++;
        }
        if(data[index] == '>' || data[index] == '\0') {
          index--;
        }

        bool passedFilter = false;
        if(filter.find(short_id) != filter.end()) {
          nr_sequences++;
          passedFilter = true;
        }

        if(passedFilter ||
            consensus_flag ||
            id.compare("ss_dssp") == 0 ||
            id.compare("sa_dssp") == 0 ||
            id.compare("ss_pred") == 0 ||
            id.compare("ss_conf") == 0) {
          std::string seq = std::string(&data[start_index], index - start_index);
          out_buffer->write(seq.c_str(), seq.size());
          out_buffer->put('\n');
        }
      }
    }

    if(nr_sequences > 0) {
      std::string out_string = out_buffer->str();
      #pragma omp critical
      {
        ffindex_insert_memory(oa3m_data_fh, oa3m_index_fh, &oa3m_offset, const_cast<char*>(out_string.c_str()), out_string.size(), entry->name);
      }
    }
    else {
      std::cerr << "WARNING: No sequences left for cluster " << entry->name << std::endl;
    }

    delete out_buffer;
  }

  fclose(oa3m_data_fh);
}
示例#9
0
int main(int argn, char **argv)
{
  int by_index = 0;
  static struct option long_options[] =
  {
    { "byindex", no_argument, NULL, 'n' },
    { NULL,      0,           NULL,  0  }
  };

  int opt;
  while (1)
  {
    int option_index = 0;
    opt = getopt_long(argn, argv, "n", long_options, &option_index);
    if (opt == -1)
      break;

    switch (opt)
    {
      case 'n':
        by_index = 1;
        break;
      default:
        usage(argv[0]);
        return EXIT_FAILURE;
    }
  }

  if(argn < 3)
  {
    usage(argv[0]);
    return EXIT_FAILURE;
  }
  char *data_filename  = argv[optind++];
  char *index_filename = argv[optind++];

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, "ffindex_get", index_filename);  exit(EXIT_FAILURE); }

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    exit(EXIT_FAILURE);
  }

  if(by_index)
  {
    for(int i = optind; i < argn; i++)
    {
      size_t index_n = atol(argv[i]) - 1; // offset from 0 but specify from 1

      ffindex_entry_t* entry = ffindex_get_entry_by_index(index, index_n);
      if(entry == NULL)
      {
        errno = ENOENT; 
        fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]);
      }
      else
      {
        char *filedata = ffindex_get_data_by_entry(data, entry);
        if(filedata == NULL)
        {
          errno = ENOENT; 
          fferror_print(__FILE__, __LINE__, "ffindex_get entry index out of range", argv[i]);
        }
        else
          fwrite(filedata, entry->length - 1, 1, stdout);
      }
    }
  }
  else // by name
  {
    for(int i = optind; i < argn; i++)
    {
      char *filename = argv[i];

      ffindex_entry_t* entry = ffindex_get_entry_by_name(index, filename);
      if(entry == NULL)
      {
        errno = ENOENT; 
        fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename);
      }
      else
      {
        char *filedata = ffindex_get_data_by_entry(data, entry);
        if(filedata == NULL)
        {
          errno = ENOENT; 
          fferror_print(__FILE__, __LINE__, "ffindex_get key not found in index", filename);
        }
        else
          fwrite(filedata, entry->length - 1, 1, stdout);
      }
    }

      /* Alternative code using (slower) ffindex_fopen */
      /*
         FILE *file = ffindex_fopen(data, index, filename);
         if(file == NULL)
         {
         errno = ENOENT; 
         fferror_print(__FILE__, __LINE__, "ffindex_fopen file not found in index", filename);
         }
         else
         {
         char line[LINE_MAX];
         while(fgets(line, LINE_MAX, file) != NULL)
         printf("%s", line);
         }
         */
  }

  return 0;
}
示例#10
0
int main(int argn, char **argv)
{
  if(argn < 4)
  {
    fprintf(stderr, "USAGE: %s DATA_FILENAME INDEX_FILENAME PROGRAM [PROGRAM_ARGS]*\n"
                    "\nDesigned and implemented by Andy Hauser <*****@*****.**>.\n",
                    argv[0]);
    return -1;
  }
  char *data_filename  = argv[1];
  char *index_filename = argv[2];
  char *program_name   = argv[3];
  char **program_argv = argv + 3;

  FILE *data_file  = fopen(data_filename,  "r");
  FILE *index_file = fopen(index_filename, "r");

  if( data_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], data_filename);  exit(EXIT_FAILURE); }
  if(index_file == NULL) { fferror_print(__FILE__, __LINE__, argv[0], index_filename);  exit(EXIT_FAILURE); }

  size_t data_size;
  char *data = ffindex_mmap_data(data_file, &data_size);

  ffindex_index_t* index = ffindex_index_parse(index_file, 0);
  if(index == NULL)
  {
    fferror_print(__FILE__, __LINE__, "ffindex_index_parse", index_filename);
    exit(EXIT_FAILURE);
  }
  
  // Ignore SIGPIPE
  struct sigaction handler;
  handler.sa_handler = SIG_IGN;
  sigemptyset(&handler.sa_mask);
  handler.sa_flags = 0;
  sigaction(SIGPIPE, &handler, NULL);

  size_t range_start = 0;
  size_t range_end = index->n_entries;

  // Foreach entry
//#pragma omp parallel for
  for(size_t entry_index = range_start; entry_index < range_end; entry_index++)
  {
    //fprintf(stderr, "index %ld\n", entry_index);
    int ret = 0;
    ffindex_entry_t* entry = ffindex_get_entry_by_index(index, entry_index);
    if(entry == NULL) { perror(entry->name); continue; }

    int pipefd[2];
    ret = pipe(pipefd);
    if(ret != 0) { perror(entry->name); continue; }

    pid_t child_pid = fork();
    if(child_pid == 0)
    {
      fclose(data_file);
      fclose(index_file);
      close(pipefd[1]);

      // Make pipe from parent our new stdin
      int newfd = dup2(pipefd[0], fileno(stdin));
      if(newfd < 0) { fprintf(stdout, "%d %d\n", pipefd[0], newfd); perror(entry->name); }
      close(pipefd[0]);

      // exec program with the pipe as stdin
      execvp(program_name, program_argv);
      // never reached
    }
    else if(child_pid > 0)
    {
      // Read end is for child only
      close(pipefd[0]);

      // Write file data to child's stdin.
      char *filedata = ffindex_get_data_by_entry(data, entry);
      ssize_t written = 0;
      while(written < entry->length)
      {
        int w = write(pipefd[1], filedata + written, entry->length - written);
        if(w < 0 && errno != EPIPE)   { perror(entry->name); break; }
        else if(w == 0 && errno != 0) { perror(entry->name); break; }
        else
          written += w;
      }

      close(pipefd[1]); // child gets EOF
      waitpid(child_pid, NULL, 0);
    }
    else
    {
      perror(entry->name);
      exit(errno);
    }
  }

  return 0;
}