// Align the haplotype to the reference genome represented by the BWT/SSA pair void HapgenUtil::alignHaplotypeToReferenceBWASW(const std::string& haplotype, const BWTIndexSet& referenceIndex, HapgenAlignmentVector& outAlignments) { PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceBWASW") LRAlignment::LRParams params; params.zBest = 20; for(size_t i = 0; i <= 1; ++i) { LRAlignment::LRHitVector hits; std::string query = (i == 0) ? haplotype : reverseComplement(haplotype); LRAlignment::bwaswAlignment(query, referenceIndex.pBWT, referenceIndex.pSSA, params, hits); // Convert the hits into alignments for(size_t j = 0; j < hits.size(); ++j) { int q_alignment_length = hits[j].q_end - hits[j].q_start; // Skip non-complete alignments if((int)haplotype.length() == q_alignment_length) { HapgenAlignment aln(hits[j].targetID, hits[j].t_start, hits[j].length, hits[j].G, i == 1); outAlignments.push_back(aln); } } } }
extern std::string consensus(const Contig* contig, const std::vector<Read*>& reads) { int size = contig->getParts().size(); if (size == 0) { return ""; } auto& first = contig->getParts().front(); POA::Graph graph(first.type() ? reads[first.src]->reverse_complement() : reads[first.src]->sequence(), "seq0"); for (int i = 1; i < size; ++i) { const auto& curr = contig->getParts()[i]; const auto& curr_seq = curr.type() ? reads[curr.src]->reverse_complement() : reads[curr.src]->sequence(); const int offset = std::max((int) (curr.offset - THRESHOLD * curr_seq.length()), (int) (BAND_PERCENTAGE * curr_seq.length())); Timer t; t.start(); POA::Alignment aln(const_cast<string&>(curr_seq), graph); aln.align_banded_starting_at(offset, BAND_PERCENTAGE * curr_seq.length()); t.stop(); t.print("consensus", "poa"); graph.insertSequenceAlignment(aln, curr_seq, "seq" + std::to_string(i)); } string consensus; graph.generate_consensus(&consensus); return consensus; }
bool TaskManagerSchedulerCount::canRemoveFromTaskManager() { static const PROGMEM prog_char functionName[] = "canRemoveFromTaskManager"; if( currentExecutionCount > executionCount ) { //all( p << "TaskManagerSchedulerCount:canRemoveFromTaskManager():removing" << endl; ) //all( wpln( "removing" ) ) aln( "removing" ) currentExecutionCount = 0; return true; } else return false; }
string consensus(const vector<string>& sequences) { if (sequences.empty()) { return ""; } Graph graph(sequences[0], "seq0"); for (size_t i = 1; i < sequences.size(); ++i) { Alignment aln(const_cast<string&>(sequences[i]), graph); aln.align(); graph.insertSequenceAlignment(aln, sequences[i], "seq" + to_string(i)); } string consensus; graph.generate_consensus(&consensus); return consensus; }
void RS232ToWiFiTask::run() { static const PROGMEM prog_char functionName[] = "run"; //static const char* prefix = "RS232ToWiFiTask:run():"; /* while( stream->available() > 0 ) { int c = stream->read(); if( c == -1 ) break; info( p << prefix << "received : " << (char)c << endl; ) stream->write( c ); }*/ //if( stream->available() > 0 ) //{ //info( p << prefix << "processing RS232<->WiFi, available bytes = " << stream->available() << endl; ) //static const unsigned int readBytesCountLimit = 65535; // todo to ma byc jako parametr przekazany do konstruktora //static const unsigned int timeoutInSeconds = 10; // todo to ma byc jako parametr przekazany do konstruktora //if( streamReadBytesCounting.getReadBytesCount() < readBytesCountLimit ) //{ //if( restServer.processStream( streamHFA11xRS232WiFiPtr ) ) if( restServer.processStream( stream ) ) { //info( p << prefix << "all operations finished on RS232<->WiFi" << endl; ) aln( "all operations finished on RS232<->WiFi" ) //stream->write( -1 ); //stream->write( -1 ); //streamReadBytesCounting.setStreamAndResetInternalState( streamReadBytesCounting.mainStream ); } else { //info( p << prefix << "RS232<->WiFi connection will be also processed in the future" << endl; ) aln( "RS232<->WiFi connection will be also processed in the future" ) } //} //else //{ // warning( p << prefix << "maximum read bytes count reached!" << endl; ) // streamReadBytesCounting.setStreamAndResetInternalState( streamReadBytesCounting.mainStream ); //} //} }
void SendResponseSubTask::setWork( bool _isGETHttpMethod , bool _sendResponseInJSON, bool _sendResponseAsServiceMetaDescription , ServiceDescription* _selectdServices , unsigned char _selectedServicesCount ) { static const PROGMEM prog_char functionName[] = "setWork"; //static const char* prefix = "SendResponseSubTask:setWork():"; isGETHttpMethod = _isGETHttpMethod; sendResponseInJSON = _sendResponseInJSON; sendResponseAsServiceMetaDescription = _sendResponseAsServiceMetaDescription; selectdServices = _selectdServices; selectedServicesCount = _selectedServicesCount; if( sendResponseAsServiceMetaDescription ) sendResponseInJSON = true; //all( p << prefix << "isGETHttpMethod = " << isGETHttpMethod << ", sendResponseInJSON = " << sendResponseInJSON << ", sendResponseAsServiceMetaDescription = " << sendResponseAsServiceMetaDescription << ", selectedServicesCount = " << selectedServicesCount << endl; ) //all( wpln( "isGETHttpMethod = %u, sendResponseInJSON = %u, sendResponseAsServiceMetaDescription = %u, selectedServicesCount = %u" , isGETHttpMethod , sendResponseInJSON , sendResponseAsServiceMetaDescription , selectedServicesCount ) ) aln( "isGETHttpMethod = %u, sendResponseInJSON = %u, sendResponseAsServiceMetaDescription = %u, selectedServicesCount = %u" , isGETHttpMethod , sendResponseInJSON , sendResponseAsServiceMetaDescription , selectedServicesCount ) currentPageRenderer = sendResponseInJSON ? jsonPageRenderer : htmlPageRenderer; //if( sendResponseInJSON ) // currentPageRenderer = jsonPageRenderer; //else // currentPageRenderer = htmlPageRenderer; }
void runNGSAnalysis(Params ¶ms) { time_t begin_time; time(&begin_time); char model_name[20]; if (!params.ngs_file) { computePairCount(params, NULL, 0.0); return; } // read input file, initialize NGSAlignment NGSAlignment aln(params.ngs_file); cout.setf(ios::fixed,ios::floatfield); //params.freq_type = FREQ_ESTIMATE; // initialize NGSTree NGSTree tree(params, &aln); aln.tree = &tree; ModelsBlock *models_block = new ModelsBlock; // initialize Model string original_model = params.model_name; if (params.model_name == "") { sprintf(model_name, "GTR+F%d", aln.ncategory); params.freq_type = FREQ_ESTIMATE; } else sprintf(model_name, "%s+F%d", params.model_name.c_str(), aln.ncategory); params.model_name = model_name; tree.setModelFactory(new ModelFactory(params, &tree, models_block)); tree.setModel(tree.getModelFactory()->model); tree.setRate(tree.getModelFactory()->site_rate); delete models_block; int model_df = tree.getModel()->getNDim() + tree.getRate()->getNDim(); cout << endl; cout << "Model of evolution: " << tree.getModelName() << " (" << model_df << " free parameters)" << endl; cout << endl; // optimize model parameters and rate scaling factors cout << "Optimizing model parameters" << endl; double bestTreeScore = tree.getModelFactory()->optimizeParameters(false, true); cout << "Log-likelihood: " << bestTreeScore << endl; DoubleMatrix part_rate(aln.ncategory); StrVector rate_name; int i, j; rate_name.push_back("Hete_error"); if (tree.getModel()->isReversible()) { for (i = 0; i < aln.num_states-1; i++) for (j = i+1; j < aln.num_states; j++) { stringstream x; x << aln.convertStateBackStr(i) << "<->" << aln.convertStateBackStr(j); rate_name.push_back(x.str()); } for (i = 0; i < aln.num_states; i++) { stringstream x; x << aln.convertStateBackStr(i); rate_name.push_back(x.str()); } } else { for (i = 0; i < aln.num_states; i++) for (j = 0; j < aln.num_states; j++) if (j != i) { stringstream x; x << aln.convertStateBackStr(i) << "->" << aln.convertStateBackStr(j); rate_name.push_back(x.str()); } } VerboseMode vb_saved = verbose_mode; verbose_mode = VB_QUIET; cout << endl << "--> INFERING RATE ASSUMING POSITION-SPECIFIC MODEL..." << endl << endl; for (int pos = 0; pos < aln.ncategory; pos++) { cout << "Position " << pos+1 << " / "; double *pair_pos = aln.pair_freq + (pos*aln.num_states*aln.num_states); testSingleRateModel(params, aln, tree, original_model, pair_pos, part_rate[pos], rate_name, false, NULL); } verbose_mode = vb_saved; double *sum_freq = new double[aln.num_states*aln.num_states]; cout << endl << "-->INFERING RATE UNDER EQUAL-RATE NULL MODEL..." << endl << endl; aln.computeSumPairFreq(sum_freq); DoubleVector null_rate; string out_file = params.out_prefix; out_file += ".ngs_e"; for (i = 0; i < aln.num_states*aln.num_states; i++) cout << sum_freq[i] << " "; cout << endl; testSingleRateModel(params, aln, tree, original_model, sum_freq, null_rate, rate_name, true, out_file.c_str()); DoubleVector two_rate; cout << endl << "-->INFERING RATE UNDER TWO-RATE MODEL..." << endl << endl; testTwoRateModel(params, aln, tree, original_model, sum_freq, two_rate, rate_name, true, NULL); // report running results out_file = params.out_prefix; out_file += ".ngs"; reportNGSAnalysis(out_file.c_str(), params, aln, tree, part_rate, rate_name); if (params.ngs_mapped_reads) { computePairCount(params, &tree, null_rate[0]); } time_t end_time; time(&end_time); cout << "Total run time: " << difftime(end_time, begin_time) << " seconds" << endl << endl; delete [] sum_freq; }
// Align the haplotype to the reference genome represented by the BWT/SSA pair void HapgenUtil::alignHaplotypeToReferenceKmer(size_t k, const std::string& haplotype, const BWTIndexSet& referenceIndex, const ReadTable* pReferenceTable, HapgenAlignmentVector& outAlignments) { PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceKmer") int64_t max_interval_size = 4; if(haplotype.size() < k) return; std::vector<int> event_count_vector; std::vector<HapgenAlignment> tmp_alignments; int min_events = std::numeric_limits<int>::max(); // Align forward and reverse haplotype to reference for(size_t i = 0; i <= 1; ++i) { bool is_reverse = i == 1; std::string query = is_reverse ? reverseComplement(haplotype) : haplotype; // Find shared kmers between the haplotype and the reference CandidateVector candidates; size_t nqk = query.size() - k + 1; for(size_t j = 0; j < nqk; ++j) { std::string kmer = query.substr(j, k); // Find the interval of this kmer in the reference BWTInterval interval = BWTAlgorithms::findInterval(referenceIndex, kmer); if(!interval.isValid() || interval.size() >= max_interval_size) continue; // not found or too repetitive // Extract the reference location of these hits for(int64_t k = interval.lower; k <= interval.upper; ++k) { SAElem elem = referenceIndex.pSSA->calcSA(k, referenceIndex.pBWT); // Make a candidate alignment CandidateKmerAlignment candidate; candidate.query_index = j; candidate.target_index = elem.getPos(); candidate.target_extrapolated_start = candidate.target_index - candidate.query_index; candidate.target_extrapolated_end = candidate.target_extrapolated_start + query.size(); candidate.target_sequence_id = elem.getID(); candidates.push_back(candidate); } } // Remove duplicate candidates std::sort(candidates.begin(), candidates.end(), CandidateKmerAlignment::sortByStart); CandidateVector::iterator new_end = std::unique(candidates.begin(), candidates.end(), CandidateKmerAlignment::equalByStart); candidates.resize(new_end - candidates.begin()); for(size_t j = 0; j < candidates.size(); ++j) { // Extract window around reference size_t window_size = 200; int ref_start = candidates[j].target_extrapolated_start - window_size; int ref_end = candidates[j].target_extrapolated_end + window_size; const SeqItem& ref_record = pReferenceTable->getRead(candidates[j].target_sequence_id); const DNAString& ref_sequence = ref_record.seq; if(ref_start < 0) ref_start = 0; if(ref_end > (int)ref_sequence.length()) ref_end = ref_sequence.length(); std::string ref_substring = ref_sequence.substr(ref_start, ref_end - ref_start); // Align haplotype to the reference SequenceOverlap overlap = alignHaplotypeToReference(ref_substring, query); if(overlap.score < 0 || !overlap.isValid()) continue; int alignment_start = ref_start + overlap.match[0].start; int alignment_end = ref_start + overlap.match[0].end; // inclusive int alignment_length = alignment_end - alignment_start + 1; // Crude count of the number of distinct variation events bool has_indel = false; int num_events = overlap.edit_distance; std::stringstream c_parser(overlap.cigar); int len; char t; while(c_parser >> len >> t) { assert(len > 0); // Only count one event per insertion/deletion if(t == 'D' || t == 'I') { num_events -= (len - 1); has_indel = true; } } // Skip poor alignments double mismatch_rate = 1.0f - (overlap.getPercentIdentity() / 100.f); if(mismatch_rate > 0.05f || overlap.total_columns < 50) { if(Verbosity::Instance().getPrintLevel() > 4) { printf("Haplotype Alignment - Ignoring low quality alignment (%.3lf, %dbp, %d events) to %s:%d\n", 1.0f - mismatch_rate, overlap.total_columns, num_events, ref_record.id.c_str(), ref_start); } continue; } bool is_snp = !has_indel && overlap.edit_distance == 1; HapgenAlignment aln(candidates[j].target_sequence_id, alignment_start, alignment_length, overlap.score, num_events, is_reverse, is_snp); tmp_alignments.push_back(aln); event_count_vector.push_back(num_events); if(Verbosity::Instance().getPrintLevel() > 4) { printf("Haplotype Alignment - Accepting alignment (%.3lf, %dbp, %d events) to %s:%d\n", 1.0f - mismatch_rate, overlap.total_columns, num_events, ref_record.id.c_str(), ref_start); } // Record the best edit distance if(num_events < min_events) min_events = num_events; } } // Copy the best alignments into the output int MAX_DIFF_TO_BEST = 10; int MAX_EVENTS = 8; assert(event_count_vector.size() == tmp_alignments.size()); for(size_t i = 0; i < event_count_vector.size(); ++i) { if(event_count_vector[i] <= MAX_EVENTS && event_count_vector[i] - min_events <= MAX_DIFF_TO_BEST) outAlignments.push_back(tmp_alignments[i]); else if(Verbosity::Instance().getPrintLevel() > 3) printf("Haplotype Alignment - Ignoring alignment with too many events (%d)\n", event_count_vector[i]); } }
// Align the haplotype to the reference genome represented by the BWT/SSA pair void HapgenUtil::alignHaplotypeToReferenceKmer(size_t k, const std::string& haplotype, const BWTIndexSet& referenceIndex, const ReadTable* pReferenceTable, HapgenAlignmentVector& outAlignments) { PROFILE_FUNC("HapgenUtil::alignHaplotypesToReferenceKmer") int64_t max_interval_size = 4; if(haplotype.size() < k) return; std::vector<int> event_count_vector; std::vector<HapgenAlignment> tmp_alignments; int min_events = std::numeric_limits<int>::max(); // Align forward and reverse haplotype to reference for(size_t i = 0; i <= 1; ++i) { bool is_reverse = i == 1; std::string query = is_reverse ? reverseComplement(haplotype) : haplotype; // Find shared kmers between the haplotype and the reference CandidateVector candidates; size_t nqk = query.size() - k + 1; for(size_t j = 0; j < nqk; ++j) { std::string kmer = query.substr(j, k); // Find the interval of this kmer in the reference BWTInterval interval = BWTAlgorithms::findInterval(referenceIndex, kmer); if(!interval.isValid() || interval.size() >= max_interval_size) continue; // not found or too repetitive // Extract the reference location of these hits for(int64_t k = interval.lower; k <= interval.upper; ++k) { SAElem elem = referenceIndex.pSSA->calcSA(k, referenceIndex.pBWT); // Make a candidate alignment CandidateKmerAlignment candidate; candidate.query_index = j; candidate.target_index = elem.getPos(); candidate.target_extrapolated_start = candidate.target_index - candidate.query_index; candidate.target_extrapolated_end = candidate.target_extrapolated_start + query.size(); candidate.target_sequence_id = elem.getID(); candidates.push_back(candidate); } } // Remove duplicate candidates std::sort(candidates.begin(), candidates.end(), CandidateKmerAlignment::sortByStart); CandidateVector::iterator new_end = std::unique(candidates.begin(), candidates.end(), CandidateKmerAlignment::equalByStart); candidates.resize(new_end - candidates.begin()); for(size_t j = 0; j < candidates.size(); ++j) { // Extract window around reference size_t window_size = 200; int ref_start = candidates[j].target_extrapolated_start - window_size; int ref_end = candidates[j].target_extrapolated_end + window_size; const DNAString& ref_sequence = pReferenceTable->getRead(candidates[j].target_sequence_id).seq; if(ref_start < 0) ref_start = 0; if(ref_end > (int)ref_sequence.length()) ref_end = ref_sequence.length(); std::string ref_substring = ref_sequence.substr(ref_start, ref_end - ref_start); // Align haplotype to the reference SequenceOverlap overlap = Overlapper::computeOverlap(query, ref_substring); // Skip terrible alignments double percent_aligned = (double)overlap.getOverlapLength() / query.size(); if(percent_aligned < 0.95f) continue; /* // Skip alignments that are not full-length matches of the haplotype if(overlap.match[0].start != 0 || overlap.match[0].end != (int)haplotype.size() - 1) continue; */ int alignment_start = ref_start + overlap.match[1].start; int alignment_end = ref_start + overlap.match[1].end; // inclusive int alignment_length = alignment_end - alignment_start + 1; // Crude count of the number of distinct variation events int num_events = overlap.edit_distance; std::stringstream c_parser(overlap.cigar); int len; char t; while(c_parser >> len >> t) { assert(len > 0); // Only count one event per insertion/deletion if(t == 'D' || t == 'I') num_events -= (len - 1); } HapgenAlignment aln(candidates[j].target_sequence_id, alignment_start, alignment_length, overlap.score, is_reverse); tmp_alignments.push_back(aln); event_count_vector.push_back(num_events); // Record the best edit distance if(num_events < min_events) min_events = num_events; } } // Copy the best alignments into the output int MAX_DIFF_TO_BEST = 10; int MAX_EVENTS = 8; assert(event_count_vector.size() == tmp_alignments.size()); for(size_t i = 0; i < event_count_vector.size(); ++i) { if(event_count_vector[i] <= MAX_EVENTS && event_count_vector[i] - min_events <= MAX_DIFF_TO_BEST) outAlignments.push_back(tmp_alignments[i]); } }