Пример #1
0
void ProcessPairedEndReads(const string& command, const string& index_file,
                           const string& reads_file_p1,
                           const string& reads_file_p2,
                           const string& output_file,
                           const uint32_t& n_reads_to_process,
                           const uint32_t& max_mismatches,
                           const string& adaptor, const uint32_t& top_k,
                           const int& frag_range, const bool& ambiguous,
                           const bool& unmapped, const bool& SAM,
                           const int& num_of_threads) {
  // LOAD THE INDEX HEAD INFO
  Genome genome;
  HashTable hash_table;

  uint32_t size_of_index;
  ReadIndexHeadInfo(index_file, genome, size_of_index);
  genome.sequence.resize(genome.length_of_genome);
  hash_table.counter.resize(power(4, F2SEEDKEYWIGTH) + 1);
  hash_table.index.resize(size_of_index);

  vector<vector<string> > index_names(2, vector<string>(2));
  index_names[0][0] = index_file + "_CT00";
  index_names[0][1] = index_file + "_CT01";
  index_names[1][0] = index_file + "_GA10";
  index_names[1][1] = index_file + "_GA11";

  vector<vector<string> > read_names(2, vector<string>(n_reads_to_process));
  vector<vector<string> > read_seqs(2, vector<string>(n_reads_to_process));
  vector<vector<string> > read_scores(2, vector<string>(n_reads_to_process));

  vector<int> ranked_results_size(2);
  vector<vector<CandidatePosition> > ranked_results(2,
          vector<CandidatePosition>(MAX_NUM_EXACT_MAPPED));

  vector<vector<TopCandidates> > top_results(2,
         vector<TopCandidates>(n_reads_to_process));

  FILE * fin[2];
  fin[0] = fopen(reads_file_p1.c_str(), "r");
  if (!fin[0]) {
    throw SMITHLABException("cannot open input file " + reads_file_p1);
  }
  fin[1] = fopen(reads_file_p2.c_str(), "r");
  if (!fin[1]) {
    throw SMITHLABException("cannot open input file " + reads_file_p2);
  }

  string adaptors[2];
  extract_adaptors(adaptor, adaptors[0], adaptors[1]);
  clock_t start_t = clock();
  FILE * fout = fopen(output_file.c_str(), "w");
  if (!fout) {
    throw SMITHLABException("cannot open input file " + output_file);
  }
  uint32_t num_of_reads[2];
  StatPairedReads stat_paired_reads(ambiguous, unmapped, output_file, SAM);
  bool AG_WILDCARD = true;
  fprintf(stderr, "[MAPPING PAIRED-END READS FROM THE FOLLOWING TWO FILES]\n");
  fprintf(stderr, "   %s (AND)\n   %s\n", reads_file_p1.c_str(),
          reads_file_p2.c_str());
  fprintf(stderr, "[OUTPUT MAPPING RESULTS TO %s]\n", output_file.c_str());
  if (SAM) {
    SAMHead(index_file, command, fout);
  }
  omp_set_dynamic(0);
  omp_set_num_threads(num_of_threads);
  for (uint32_t i = 0;; i += n_reads_to_process) {
    num_of_reads[0] = num_of_reads[1] = 0;
    for (uint32_t pi = 0; pi < 2; ++pi) {  // paired end reads _1 and _2
      AG_WILDCARD = pi == 1 ? true : false;
      LoadReadsFromFastqFile(fin[pi], i, n_reads_to_process, adaptors[pi],
                             num_of_reads[pi], read_names[pi], read_seqs[pi],
                             read_scores[pi]);
      if (num_of_reads[pi] == 0)
        break;

      //Initialize the paired results
      for (uint32_t j = 0; j < num_of_reads[pi]; ++j) {
        top_results[pi][j].Clear();
        top_results[pi][j].SetSize(top_k);
      }

      for (uint32_t fi = 0; fi < 2; ++fi) {
        ReadIndex(index_names[pi][fi], genome, hash_table);
        char strand = fi == 0 ? '+' : '-';
#pragma omp parallel for
        for (uint32_t j = 0; j < num_of_reads[pi]; ++j) {
          PairEndMapping(read_seqs[pi][j], genome, hash_table, strand,
                         AG_WILDCARD, max_mismatches, top_results[pi][j]);
        }
      }
    }
    if (num_of_reads[0] != num_of_reads[1]) {
      fprintf(stderr,
              "The number of reads in paired-end files should be the same.\n");
      exit( EXIT_FAILURE);
    }
    if (num_of_reads[0] == 0) {
      break;
    }
    stat_paired_reads.total_read_pairs += num_of_reads[0];
    ///////////////////////////////////////////////////////////
    // Merge Paired-end results
    for (uint32_t j = 0; j < num_of_reads[0]; ++j) {
      for (uint32_t pi = 0; pi < 2; ++pi) {
        ranked_results_size[pi] = 0;
        while (!top_results[pi][j].candidates.empty()) {
          ranked_results[pi][ranked_results_size[pi]++] =
              top_results[pi][j].Top();
          top_results[pi][j].Pop();
        }
      }

      MergePairedEndResults(genome, read_names[0][j], read_seqs[0][j],
                            read_scores[0][j], read_seqs[1][j],
                            read_scores[1][j], ranked_results,
                            ranked_results_size, frag_range, max_mismatches,
                            SAM, stat_paired_reads, fout);
    }

    if (num_of_reads[0] < n_reads_to_process)
      break;
  }

  fclose(fin[0]);
  fclose(fin[1]);
  fclose(fout);

  OutputPairedStatInfo(stat_paired_reads, output_file);
  fprintf(stderr, "[MAPPING TAKES %.0lf SECONDS]\n",
          (double(clock() - start_t) / CLOCKS_PER_SEC));
}
Пример #2
0
Info API::DeleteFrom(SqlCommandDeleteFrom* command){
  CatalogManager catalog_manager;
  IndexManager index_manager;
  RecordManager record_manager;

  bool delete_all_records = command->delete_all_records();
  std::string table_name = command->table_name();
  std::vector<WhereClause> where_clause = command->where_clause();
  if (!catalog_manager.HasTable(table_name)){
    std::string error_info;
    error_info = "Table \"" + table_name + "\" not exists.";
    return Info(error_info);
  }

  auto table = catalog_manager.GetTableInfo(table_name);

  for (auto it: where_clause){
    if(!table.HasAttribute(it.kColumnName)){
      std::string error_info;
      error_info = "attribute \"" + it.kColumnName + "\"in where clause \""+ it.kColumnName + it.kOperator + it.kCondition +"\" not exists.";
      return Info(error_info);
    }
  }

  if (delete_all_records){
    if(record_manager.DeleteAllRecords(table)){
      //update index
      for (auto it : table.index_names()){
        IndexInfo index_info = catalog_manager.GetIndexInfo(it);
        index_manager.EmptyIndex(index_info);
      }
      return Info();
    }
    else{
      return Info("Delete records failed");
    }
  }
  else{
    std::vector<WhereClause> where_clause_with_index;
    std::vector<WhereClause> where_clause_without_index;

    for (auto it : where_clause){
      if (!table.attribute(it.kColumnName).index_names().empty() && it.kOperator != "!="){
        where_clause_with_index.push_back(it);
      }
      else{
        where_clause_without_index.push_back(it);
      }
    }

    std::vector<std::pair<int,int>> results;

    if (!where_clause_with_index.empty()) //条件里有属性有 index
    {

      for (auto it : where_clause_with_index){
        std::string index_name = table.attribute(it.kColumnName).index_names().at(0);
        IndexInfo index = catalog_manager.GetIndexInfo(index_name);
        std::vector<int> offsets_of_a_clause = index_manager.FindRecords(table,index, it);
        if(offsets_of_a_clause.empty()){
          std::string error_info;
          error_info = "record not found with index, deletion failed";
          return Info(error_info);
        }
        std::sort(offsets_of_a_clause.begin(), offsets_of_a_clause.end());
        offsets_of_a_clause.erase(std::unique(offsets_of_a_clause.begin(), offsets_of_a_clause.end()), offsets_of_a_clause.end());

        std::vector<std::pair<int,int>> results_of_a_clause = record_manager.FindRecordsWithIndex(offsets_of_a_clause, table, it);

        if (results.empty()){
          results = results_of_a_clause;
        }
        else{
          std::vector<std::pair<int,int> > results_temp;
          std::set_intersection(results.begin(),results.end(),
                                results_of_a_clause.begin(),results_of_a_clause.end(),
                                std::back_inserter(results_temp));
          results = results_temp;

        }
      }
      for(auto it : where_clause_without_index){
        if(results.empty()){
          std::string error_info;
          error_info = "record not found with index, deletion failed";
          return Info(error_info);
        }
        results = record_manager.RecordsFilter(results, table, it);
      }
    }
    else{ //条件里属性都没有 index
      for (auto it: where_clause_without_index){
        if (results.empty()){
          results = record_manager.FindRecordsWithNoIndex(table, it);
        }
        else{
          results = record_manager.RecordsFilter(results,table, it);
        }
      }
      if(results.empty()){
        std::string error_info;
        error_info = "record not found with no index, deletion failed";
        return Info(error_info);
      }

    }
    if(results.empty()){
        std::string error_info;
        error_info = "record not found, deletion failed";
        return Info(error_info);
    }
    auto records = record_manager.SelectRecords(results, table);
    int ret = record_manager.DeleteRecords(results, table);
    if(!ret){
      return Info("Delete records failed.");
    }
    else{
      auto index_names = table.index_names();
      int pair_index = 0;
      for (auto it : records){
        for (auto i : index_names){
          IndexInfo index_info = catalog_manager.GetIndexInfo(i);
          auto index = table.attribute_index(index_info.attribute_name());
          std::string value = it.at(index);
          int offset_i = results.at(pair_index).first;
          bool ret = index_manager.DeleteRecord(table, index_info, value, offset_i);
          if(!ret){
            return Info("Delete record in index \"" + index_info.name() + "on attribute \"" + index_info.attribute_name()
                        +"\" of table \"" + index_info.table_name() + "\"failed.");
          }
        }
        pair_index++;
      }
      return Info();
    }
  }
}