void ProcessPairedEndReads(const string& command, const string& index_file, const string& reads_file_p1, const string& reads_file_p2, const string& output_file, const uint32_t& n_reads_to_process, const uint32_t& max_mismatches, const string& adaptor, const uint32_t& top_k, const int& frag_range, const bool& ambiguous, const bool& unmapped, const bool& SAM, const int& num_of_threads) { // LOAD THE INDEX HEAD INFO Genome genome; HashTable hash_table; uint32_t size_of_index; ReadIndexHeadInfo(index_file, genome, size_of_index); genome.sequence.resize(genome.length_of_genome); hash_table.counter.resize(power(4, F2SEEDKEYWIGTH) + 1); hash_table.index.resize(size_of_index); vector<vector<string> > index_names(2, vector<string>(2)); index_names[0][0] = index_file + "_CT00"; index_names[0][1] = index_file + "_CT01"; index_names[1][0] = index_file + "_GA10"; index_names[1][1] = index_file + "_GA11"; vector<vector<string> > read_names(2, vector<string>(n_reads_to_process)); vector<vector<string> > read_seqs(2, vector<string>(n_reads_to_process)); vector<vector<string> > read_scores(2, vector<string>(n_reads_to_process)); vector<int> ranked_results_size(2); vector<vector<CandidatePosition> > ranked_results(2, vector<CandidatePosition>(MAX_NUM_EXACT_MAPPED)); vector<vector<TopCandidates> > top_results(2, vector<TopCandidates>(n_reads_to_process)); FILE * fin[2]; fin[0] = fopen(reads_file_p1.c_str(), "r"); if (!fin[0]) { throw SMITHLABException("cannot open input file " + reads_file_p1); } fin[1] = fopen(reads_file_p2.c_str(), "r"); if (!fin[1]) { throw SMITHLABException("cannot open input file " + reads_file_p2); } string adaptors[2]; extract_adaptors(adaptor, adaptors[0], adaptors[1]); clock_t start_t = clock(); FILE * fout = fopen(output_file.c_str(), "w"); if (!fout) { throw SMITHLABException("cannot open input file " + output_file); } uint32_t num_of_reads[2]; StatPairedReads stat_paired_reads(ambiguous, unmapped, output_file, SAM); bool AG_WILDCARD = true; fprintf(stderr, "[MAPPING PAIRED-END READS FROM THE FOLLOWING TWO FILES]\n"); fprintf(stderr, " %s (AND)\n %s\n", reads_file_p1.c_str(), reads_file_p2.c_str()); fprintf(stderr, "[OUTPUT MAPPING RESULTS TO %s]\n", output_file.c_str()); if (SAM) { SAMHead(index_file, command, fout); } omp_set_dynamic(0); omp_set_num_threads(num_of_threads); for (uint32_t i = 0;; i += n_reads_to_process) { num_of_reads[0] = num_of_reads[1] = 0; for (uint32_t pi = 0; pi < 2; ++pi) { // paired end reads _1 and _2 AG_WILDCARD = pi == 1 ? true : false; LoadReadsFromFastqFile(fin[pi], i, n_reads_to_process, adaptors[pi], num_of_reads[pi], read_names[pi], read_seqs[pi], read_scores[pi]); if (num_of_reads[pi] == 0) break; //Initialize the paired results for (uint32_t j = 0; j < num_of_reads[pi]; ++j) { top_results[pi][j].Clear(); top_results[pi][j].SetSize(top_k); } for (uint32_t fi = 0; fi < 2; ++fi) { ReadIndex(index_names[pi][fi], genome, hash_table); char strand = fi == 0 ? '+' : '-'; #pragma omp parallel for for (uint32_t j = 0; j < num_of_reads[pi]; ++j) { PairEndMapping(read_seqs[pi][j], genome, hash_table, strand, AG_WILDCARD, max_mismatches, top_results[pi][j]); } } } if (num_of_reads[0] != num_of_reads[1]) { fprintf(stderr, "The number of reads in paired-end files should be the same.\n"); exit( EXIT_FAILURE); } if (num_of_reads[0] == 0) { break; } stat_paired_reads.total_read_pairs += num_of_reads[0]; /////////////////////////////////////////////////////////// // Merge Paired-end results for (uint32_t j = 0; j < num_of_reads[0]; ++j) { for (uint32_t pi = 0; pi < 2; ++pi) { ranked_results_size[pi] = 0; while (!top_results[pi][j].candidates.empty()) { ranked_results[pi][ranked_results_size[pi]++] = top_results[pi][j].Top(); top_results[pi][j].Pop(); } } MergePairedEndResults(genome, read_names[0][j], read_seqs[0][j], read_scores[0][j], read_seqs[1][j], read_scores[1][j], ranked_results, ranked_results_size, frag_range, max_mismatches, SAM, stat_paired_reads, fout); } if (num_of_reads[0] < n_reads_to_process) break; } fclose(fin[0]); fclose(fin[1]); fclose(fout); OutputPairedStatInfo(stat_paired_reads, output_file); fprintf(stderr, "[MAPPING TAKES %.0lf SECONDS]\n", (double(clock() - start_t) / CLOCKS_PER_SEC)); }
Info API::DeleteFrom(SqlCommandDeleteFrom* command){ CatalogManager catalog_manager; IndexManager index_manager; RecordManager record_manager; bool delete_all_records = command->delete_all_records(); std::string table_name = command->table_name(); std::vector<WhereClause> where_clause = command->where_clause(); if (!catalog_manager.HasTable(table_name)){ std::string error_info; error_info = "Table \"" + table_name + "\" not exists."; return Info(error_info); } auto table = catalog_manager.GetTableInfo(table_name); for (auto it: where_clause){ if(!table.HasAttribute(it.kColumnName)){ std::string error_info; error_info = "attribute \"" + it.kColumnName + "\"in where clause \""+ it.kColumnName + it.kOperator + it.kCondition +"\" not exists."; return Info(error_info); } } if (delete_all_records){ if(record_manager.DeleteAllRecords(table)){ //update index for (auto it : table.index_names()){ IndexInfo index_info = catalog_manager.GetIndexInfo(it); index_manager.EmptyIndex(index_info); } return Info(); } else{ return Info("Delete records failed"); } } else{ std::vector<WhereClause> where_clause_with_index; std::vector<WhereClause> where_clause_without_index; for (auto it : where_clause){ if (!table.attribute(it.kColumnName).index_names().empty() && it.kOperator != "!="){ where_clause_with_index.push_back(it); } else{ where_clause_without_index.push_back(it); } } std::vector<std::pair<int,int>> results; if (!where_clause_with_index.empty()) //条件里有属性有 index { for (auto it : where_clause_with_index){ std::string index_name = table.attribute(it.kColumnName).index_names().at(0); IndexInfo index = catalog_manager.GetIndexInfo(index_name); std::vector<int> offsets_of_a_clause = index_manager.FindRecords(table,index, it); if(offsets_of_a_clause.empty()){ std::string error_info; error_info = "record not found with index, deletion failed"; return Info(error_info); } std::sort(offsets_of_a_clause.begin(), offsets_of_a_clause.end()); offsets_of_a_clause.erase(std::unique(offsets_of_a_clause.begin(), offsets_of_a_clause.end()), offsets_of_a_clause.end()); std::vector<std::pair<int,int>> results_of_a_clause = record_manager.FindRecordsWithIndex(offsets_of_a_clause, table, it); if (results.empty()){ results = results_of_a_clause; } else{ std::vector<std::pair<int,int> > results_temp; std::set_intersection(results.begin(),results.end(), results_of_a_clause.begin(),results_of_a_clause.end(), std::back_inserter(results_temp)); results = results_temp; } } for(auto it : where_clause_without_index){ if(results.empty()){ std::string error_info; error_info = "record not found with index, deletion failed"; return Info(error_info); } results = record_manager.RecordsFilter(results, table, it); } } else{ //条件里属性都没有 index for (auto it: where_clause_without_index){ if (results.empty()){ results = record_manager.FindRecordsWithNoIndex(table, it); } else{ results = record_manager.RecordsFilter(results,table, it); } } if(results.empty()){ std::string error_info; error_info = "record not found with no index, deletion failed"; return Info(error_info); } } if(results.empty()){ std::string error_info; error_info = "record not found, deletion failed"; return Info(error_info); } auto records = record_manager.SelectRecords(results, table); int ret = record_manager.DeleteRecords(results, table); if(!ret){ return Info("Delete records failed."); } else{ auto index_names = table.index_names(); int pair_index = 0; for (auto it : records){ for (auto i : index_names){ IndexInfo index_info = catalog_manager.GetIndexInfo(i); auto index = table.attribute_index(index_info.attribute_name()); std::string value = it.at(index); int offset_i = results.at(pair_index).first; bool ret = index_manager.DeleteRecord(table, index_info, value, offset_i); if(!ret){ return Info("Delete record in index \"" + index_info.name() + "on attribute \"" + index_info.attribute_name() +"\" of table \"" + index_info.table_name() + "\"failed."); } } pair_index++; } return Info(); } } }