MultiReadMutationScorer<R>::MultiReadMutationScorer(const QuiverConfig& quiverConfig, std::string tpl) : recursor_(quiverConfig.MovesAvailable, quiverConfig.Banding), quiverConfig_(quiverConfig), fwdTemplate_(tpl), revTemplate_(ReverseComplement(tpl)), scorerForRead_() { DEBUG_ONLY(CheckInvariants()); }
void FoldPalindrome(std::string &s, int kmer_k, bool is_loop) { if (is_loop) { for (unsigned i = 1; i + kmer_k <= s.length(); ++i) { std::string rc = s.substr(i, kmer_k); ReverseComplement(rc); if (rc == s.substr(i - 1, kmer_k)) { assert(i <= s.length() / 2); s = s.substr(i, s.length() / 2); break; } } } else { int num_kmer = s.length() - kmer_k + 1; assert(num_kmer % 2 == 0); s.resize(num_kmer / 2 + (kmer_k - 1)); } }
void TryLI(std::map<std::string,int> & ChrName2Index, ControlState & CurrentState, const Assembly & OneSV, std::vector <SPLIT_READ> & First, std::vector <SPLIT_READ> & Second, std::ofstream & ASM_Output) { short MinimumOverlap = 10; short MaximumOverlap;// = min(); short MaxMismatch = 3; short CountMismatch; short FirstLength, SecondLength; std::string FirstOne, SecondOne, MergedString; std::cout << "TryLI" << std::endl; for (unsigned ReadIndex = 0; ReadIndex < First.size(); ReadIndex++) { std::cout << First[ReadIndex].MatchedD << " " << First[ReadIndex].MatchedRelPos << std::endl; } for (unsigned ReadIndex = 0; ReadIndex < Second.size(); ReadIndex++) { std::cout << Second[ReadIndex].MatchedD << " " << Second[ReadIndex].MatchedRelPos << std::endl; } for (unsigned ReadIndex_Plus = 0; ReadIndex_Plus < First.size(); ReadIndex_Plus++) { if (First[ReadIndex_Plus].MatchedD == '-') continue; for (unsigned ReadIndex_Minus = 0; ReadIndex_Minus < Second.size(); ReadIndex_Minus++) { if (Second[ReadIndex_Minus].MatchedD == '+') continue; MaximumOverlap = std::min(First[ReadIndex_Plus].getReadLength(), Second[ReadIndex_Minus].getReadLength()); std::cout << MaximumOverlap << std::endl; FirstOne = ReverseComplement(First[ReadIndex_Plus].getUnmatchedSeq()); SecondOne = Second[ReadIndex_Minus].getUnmatchedSeq(); FirstLength = FirstOne.size(); SecondLength = SecondOne.size(); std::cout << FirstOne << "\n" << SecondOne << "\n"; for (short OverlapCount = MinimumOverlap; OverlapCount < MaximumOverlap; OverlapCount++) { CountMismatch = 0; for (short pos_index = 0; pos_index < OverlapCount; pos_index++) { if (FirstOne[FirstLength - OverlapCount + pos_index] != SecondOne[pos_index]) { ++CountMismatch; } if (CountMismatch > MaxMismatch) { break; } } if (CountMismatch <= MaxMismatch) { MergedString = FirstOne.substr(0, FirstLength - OverlapCount) + SecondOne; std::cout << "MergedString: " << OverlapCount << " " << MergedString << std::endl; //ReportLI(AllChromosomes, ChrName2Index, CurrentState, OneSV, First[ReadIndex_Plus], Second[ReadIndex_Minus], MergedString, OverlapCount, ASM_Output); } } } } }
void Window::ScanReads( const std::vector <SPLIT_READ> &readsInWindow, unsigned short bamIndex, bool isTumor) { // openmp parallel omp_set_num_threads( paramd.numberThreads ); #pragma omp parallel for for (unsigned short i=0; i<_siteCount; i++) { HomoSite *p = _startSite + i; unsigned long tsize = readsInWindow.size(); for (unsigned long j=0; j<tsize; j++) { if ( readsInWindow[j].Mapped ) { if ( (readsInWindow[j].MatchedRelPos < p->lowcut) || (readsInWindow[j].MatchedRelPos > p->highcut) ) continue; } unsigned short tCount = DoOneRead(readsInWindow[j].ReadSeq, p); if ( (tCount > 0) && (tCount < paramd.s_dispots) ) { if (isTumor) { p->tumorDis[bamIndex][tCount-1]++; } else { p->normalDis[bamIndex][tCount-1]++; } } else { // don't scan reverse if mapped if ( readsInWindow[j].Mapped ) continue; // reverse std::string tStr = readsInWindow[j].ReadSeq; ReverseComplement(tStr); tCount = DoOneRead(tStr, p); if ( (tCount > 0) && (tCount < paramd.s_dispots) ) { if (isTumor) { p->tumorDis[bamIndex][tCount-1]++; } else { p->normalDis[bamIndex][tCount-1]++; } } } } } }
void AlleleIdentity::PredictSequenceMotifSSE(const LocalReferenceContext &reference_context, const TIonMotifSet & ErrorMotifs, const ReferenceReader &ref_reader, int chr_idx) { //cout << "Hello from PredictSequenceMotifSSE" << endl; sse_prob_positive_strand = 0; sse_prob_negative_strand = 0; //long vcf_position = reference_context.position0+1; long var_position = reference_context.position0 + left_anchor; // This points to the first deleted base string seqContext; // status.isHPIndel && status.isDeletion implies reference_context.my_hp_length.at(left_anchor) > 1 if (status.isHPIndel && status.isDeletion) { // cout << start_pos << "\t" << variant_context.refBaseAtCandidatePosition << variant_context.ref_hp_length << "\t" << variant_context.refBaseLeft << variant_context.left_hp_length << "\t" << variant_context.refBaseRight << variant_context.right_hp_length << "\t"; unsigned context_left = var_position >= 10 ? 10 : var_position; //if (var_position + reference_context.my_hp_length.at(left_anchor) + 10 < ref_reader.chr_size(chr_idx)) seqContext = ref_reader.substr(chr_idx, var_position - context_left, context_left + (unsigned int)reference_context.my_hp_length[left_anchor] + 10); // else // seqContext = ref_reader.substr(chr_idx, var_position - context_left); if (seqContext.length() > 0 && context_left < seqContext.length()) { sse_prob_positive_strand = ErrorMotifs.get_sse_probability(seqContext, context_left); //cout << seqContext << "\t" << context_left << "\t" << sse_prob_positive_strand << "\t"; context_left = seqContext.length() - context_left - 1; string reverse_seqContext; ReverseComplement(seqContext, reverse_seqContext); sse_prob_negative_strand = ErrorMotifs.get_sse_probability(reverse_seqContext, context_left); // cout << reverse_seqContext << "\t" << context_left << "\t" << sse_prob_negative_strand << "\t"; } } }
SparsePoa::ReadKey SparsePoa::OrientAndAddRead(const std::string& readSequence, const PoaAlignmentOptions& /* alnOptions */, float minScoreToAdd) { AlignConfig config = DefaultPoaConfig(AlignMode::LOCAL); Path outputPath; ReadKey key; if (graph_->NumReads() == 0) { graph_->AddFirstRead(readSequence, &outputPath); readPaths_.push_back(outputPath); reverseComplemented_.push_back(false); key = graph_->NumReads() - 1; } else { auto c1 = graph_->TryAddRead(readSequence, config, rangeFinder_); auto c2 = graph_->TryAddRead(ReverseComplement(readSequence), config, rangeFinder_); if (c1->Score() >= c2->Score() && c1->Score() >= minScoreToAdd) { graph_->CommitAdd(c1, &outputPath); readPaths_.push_back(outputPath); reverseComplemented_.push_back(false); key = graph_->NumReads() - 1; } else if (c2->Score() >= c1->Score() && c2->Score() >= minScoreToAdd) { graph_->CommitAdd(c2, &outputPath); readPaths_.push_back(outputPath); reverseComplemented_.push_back(true); key = graph_->NumReads() - 1; } else { key = -1; } delete c1; delete c2; } return key; }
int searchIndels(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window ) { static int Count_DI = 0; static int Count_DI_Plus = 0; static int Count_DI_Minus = 0; unsigned CloseIndex, FarIndex; std::vector<unsigned> DI[NumBoxes]; unsigned TempBoxIndex; LOG_INFO(*logStream << "Searching deletion-insertions ... " << std::endl); //UserDefinedSettings *userSettings = UserDefinedSettings::Instance(); for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) { SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex]; if (currentRead.Used || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) { continue; } CloseIndex = currentRead.UP_Close.size() - 1; FarIndex = currentRead.UP_Far.size() - 1; if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > (short) (1 + userSettings->Seq_Error_Rate * (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr))) { continue; } if (currentRead.MatchedD == Plus) { if (currentRead.UP_Far[FarIndex].Direction == Minus) { if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases && currentRead.UP_Far[FarIndex].AbsLoc > currentRead.UP_Close[CloseIndex].AbsLoc + 1) { currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + 1; currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()). substr( currentRead.BP + 1, currentRead.NT_size); currentRead.IndelSize = (currentRead.Right - currentRead.Left) + currentRead.NT_size - currentRead.getReadLengthMinus(); currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; if (1) { if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if (readInSpecifiedRegion( currentRead, userSettings->getRegion() ) ) { TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize; if (TempBoxIndex < NumBoxes) { DI[TempBoxIndex]. push_back(ReadIndex); currentRead.Used = true; Count_DI++; Count_DI_Plus++; } } } } } } } else if (currentRead.MatchedD == Minus) { if (currentRead.UP_Far[FarIndex].Direction == Plus) { if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr >= userSettings->Min_Num_Matched_Bases && currentRead.UP_Close[CloseIndex].AbsLoc > currentRead.UP_Far[FarIndex].AbsLoc + 1) { currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + 1; currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Close[CloseIndex].LengthStr - currentRead.UP_Far[FarIndex].LengthStr; currentRead.NT_str = currentRead.getUnmatchedSeq(). substr( currentRead.BP + 1, currentRead.NT_size); currentRead.IndelSize = (currentRead.Right - currentRead.Left) - currentRead.getReadLengthMinus() + currentRead.NT_size; currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; { if ( 1 ) { if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle( currentRead, currentState.FutureReads_SR); } else { if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) { TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize; if (TempBoxIndex < NumBoxes) { DI[TempBoxIndex]. push_back(ReadIndex); currentRead.Used = true; Count_DI++; Count_DI_Minus++; } } } } } } } } } LOG_INFO(*logStream << "Total: " << Count_DI << "\t+" << Count_DI_Plus << "\t-" << Count_DI_Minus << std::endl); std::ofstream DeletionOutf( userSettings->getDOutputFilename().c_str(), std::ios::app); std::ofstream inversionsOutf( userSettings->getINVOutputFilename().c_str(), std::ios::app); SortOutputDI(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, DI, DeletionOutf, inversionsOutf); DeletionOutf.close(); for (unsigned int i = 0; i < NumBoxes; i++) { DI[i].clear(); } return EXIT_SUCCESS; }
int searchInversionsNT(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window) { static int Count_Inv_NT = 0; static int Count_Inv_NT_Plus = 0; static int Count_Inv_NT_Minus = 0; std::vector<unsigned> Inv_NT[NumBoxes]; int CloseIndex = 0; int FarIndex = 0; //UserDefinedSettings *userSettings = UserDefinedSettings::Instance(); LOG_INFO(*logStream << "Searching inversions with non-template sequence ... " << std::endl); for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) { SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex]; if (currentRead.Used || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) { continue; } CloseIndex = currentRead.UP_Close.size() - 1; FarIndex = currentRead.UP_Far.size() - 1; if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > (short) (1 + userSettings->Seq_Error_Rate * (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr))) { continue; } if (currentRead.UP_Close[0].Strand != currentRead.UP_Far[0].Strand && currentRead.UP_Close[0].Direction == currentRead.UP_Far[0].Direction) { if (currentRead.MatchedD == Plus) { if (currentRead.UP_Far[FarIndex]. Direction == Plus) { if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Far[FarIndex].AbsLoc > currentRead.UP_Close[CloseIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases ) { currentRead.Left = (currentRead. UP_Close[CloseIndex].AbsLoc + 1) - currentRead.UP_Close[CloseIndex].LengthStr; currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + currentRead.getReadLength(); currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; // NT_2str currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()).substr(currentRead.BP + 1, currentRead.NT_size); currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc + 1 - g_SpacerBeforeAfter; currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( 1 ) { if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) { Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_Inv_NT++; Count_Inv_NT_Plus++; } } } } // anchor inside reversed block. if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Far[FarIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion < currentRead.UP_Close[CloseIndex].AbsLoc && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) { currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + currentRead.getReadLength(); currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + 1; currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Far[FarIndex].AbsLoc; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; currentRead.NT_str = currentRead.getUnmatchedSeq(). substr( currentRead.BP + 1, currentRead.NT_size); currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPLeft = (currentRead.UP_Far[FarIndex].AbsLoc + 1) - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) { Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_Inv_NT++; Count_Inv_NT_Plus++; } } } } } else if (currentRead.MatchedD == Minus) { if (currentRead.UP_Far[FarIndex]. Direction == Minus) { // anchor outside reversed block. if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Close[CloseIndex].AbsLoc > currentRead.UP_Far[FarIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) { currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - currentRead.getReadLength(); currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Far[FarIndex].AbsLoc; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; currentRead.NT_str = currentRead.getUnmatchedSeq().substr( currentRead.BP + 1, currentRead.NT_size); currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - 1 - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) { Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_Inv_NT++; Count_Inv_NT_Minus++; } } } // anchor inside reversed block. if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() && currentRead.UP_Close[CloseIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion < currentRead.UP_Far[FarIndex].AbsLoc && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) { currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - currentRead.getReadLength(); currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()). substr( currentRead.BP + 1, currentRead.NT_size); currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - 1 - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) { Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_Inv_NT++; Count_Inv_NT_Minus++; } } } } } } } LOG_INFO(*logStream << "Total: " << Count_Inv_NT << "\t+" << Count_Inv_NT_Plus << "\t-" << Count_Inv_NT_Minus << std::endl); std::ofstream InversionOutf(userSettings->getINVOutputFilename().c_str(), std::ios::app); SortOutputInv_NT(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, Inv_NT, InversionOutf); for (unsigned int i = 0; i < NumBoxes; i++) { Inv_NT[i].clear(); } return EXIT_SUCCESS; }
int main( int argc, char *argv[] ) { int i, j, k ; int len ; char *p ; FILE *fp ; int FILE_TYPE ; // 0-fasta, 1-fastq int correctCount = 0, errorCount = 0 ; int sameCount = 0 ; int trimCount = 0, trimSum = 0 ; int exp ; //0-low,1-med,2-high,3-unknown bool verbose = false ; bool baseVerbose = false ; int baseTP[4] = {0,0,0,0}, baseFP[4] = {0,0,0,0}, baseFN[4] = {0,0,0,0} ; int readTP[4] = {0,0,0,0}, readFP[4] = {0,0,0,0}, readFN[4] = {0,0,0,0} ; bool useExp = false ; bool ignoreIndel = false ; bool allowTrim = false ; int polyA = 100000 ; for ( i = 1 ; i < argc ; ++i ) { if ( !strcmp( argv[i],"-v" ) ) verbose = true ; else if ( !strcmp( argv[i], "-bv" ) ) baseVerbose = true ; else if ( !strcmp( argv[i], "-exp" ) ) useExp = true ; else if ( !strcmp( argv[i], "-polyA" ) ) { polyA = atoi( argv[i + 1] ) ; ++i ; } else if ( !strcmp( argv[i], "-noindel" ) ) ignoreIndel = true ; else if ( !strcmp( argv[i], "-trim" ) ) allowTrim = true ; else { printf( "Unknown para\n" ) ; exit( 0 ) ; } } // Decide whether it is FASTQ or FASTA. fp = stdin ;//fopen( argv[1], "r" ) ; fscanf( fp, "%s", buffer ) ; if ( buffer[0] == '>' ) FILE_TYPE = 0 ; else FILE_TYPE = 1 ; //fclose( fp ) ; //fp = stdin ;//fopen( argv[1], "r" ) ; while ( fgets( id, sizeof( id ), fp ) != NULL ) { if ( FILE_TYPE == 0 ) { fgets( seq, sizeof( seq ), fp ) ; } else if ( FILE_TYPE == 1 ) { fgets( seq, sizeof( seq ), fp ) ; fgets( buffer, sizeof( buffer ), fp ) ; fgets( qual, sizeof( qual ), fp ) ; } //printf( "%s%s%s", id, seq,qual ) ; // Clean the return symbol len = strlen( id ) ; if ( id[len - 1] == '\n') id[len - 1] = '\0' ; len = strlen( seq ) ; if ( seq[len - 1] == '\n' ) seq[len - 1] = '\0' ; if ( qual[len - 1] == '\n' ) qual[len - 1] = '\0' ; // Parse the id field p = FindIdColumn( id, "haplotype_infix" ) ; sscanf( p, "%s", origSeq ) ; // Test whether this contain a polyA tag j = k = 0 ; for ( i = 0 ; origSeq[i] ; ++i ) { if ( origSeq[i] == 'A' ) { ++j ; } else if ( origSeq[i] == 'T' ) { ++k ; } else { if ( j >= polyA || k >= polyA ) break ; j = k = 0 ; } } if ( j >= polyA || k >= polyA ) continue ; if ( ignoreIndel && strlen( seq ) != strlen( origSeq ) ) continue ; /*if ( strlen( seq ) != strlen( origSeq ) ) { printf( "%s\n%s\n", id, seq ) ; }*/ p = FindIdColumn( id, "edit_string" ) ; sscanf( p, "%s", cigar ) ; p = FindIdColumn( id, "strand=reverse" ) ; if ( p != NULL ) { ReverseComplement( origSeq ) ; } p = FindIdColumn( id, "exp" ) ; if ( p != NULL ) { sscanf( p, "%s", buffer ) ; if ( !strcmp( buffer, "high" ) ) exp = 2 ; else if ( !strcmp( buffer, "medium" ) ) exp = 1 ; else if ( !strcmp( buffer, "low" ) ) exp = 0 ; else exp = 3 ; } else exp = 3 ; if ( verbose || baseVerbose ) printf( "%s\n", id ) ; //printf( "%s %s\n", seq, origSeq ) ; if ( StrCompWithTrim( origSeq, seq ) ) { /*if ( verbose ) { printf( "Diff:\n%s\n%s\n", id, seq ) ; }*/ ++errorCount ; for ( i = 0 ; cigar[i] ; ++i ) { if ( cigar[i] != 'M' ) break ; } if ( cigar[i] ) { if ( verbose ) printf( "FN\n" ) ; ++readFN[exp] ; } else { if ( verbose ) printf( "FP\n" ) ; ++readFP[exp] ; } } else { /*if ( verbose ) { printf( "Same:\n%s\n%s\n", id, seq ) ; }*/ //printf( "S\n" ) ; ++correctCount ; for ( i = 0 ; cigar[i] ; ++i ) { if ( cigar[i] != 'M' ) break ; } if ( cigar[i] ) { if ( verbose ) printf( "TP\n" ) ; ++readTP[exp] ; } } for ( i = 0 ; cigar[i] ; ++i ) { if ( cigar[i] != 'M' ) break ; } if ( !cigar[i] ) ++sameCount ; p = FindIdColumn( id, "trim" ) ; if ( p != NULL ) { int tmp = atoi( p ) ; //printf( "%d %s\n", tmp, p ) ; ++trimCount ; trimSum += tmp ; } // Collect information of TP, FP, FN for base level int verboseType = 0 ; int lena = strlen( origSeq ) ; int lenb = strlen( seq ) ; bool visited[2048] ; memset( visited, false, sizeof( bool ) * lena ) ; if ( allowTrim == false ) { Alignment( lena, origSeq, lenb, seq ) ; } else { for ( i = 0 ; seq[i] ; ++i ) align[i] = i ; } for ( i = 0 ; seq[i] ; ++i ) { //printf( "(%d, %d) ", align[i], baseFP[exp] ) ; if ( align[i] == -1 ) { //if ( i > 2 && i < lenb - 2 ) // printf( "%s\n%s\n", id, seq ) ; ++baseFP[exp] ; continue ; } visited[ align[i] ] = true ; /*if ( i == 0 ) baseFP[exp] += align[i] ; else baseFP[exp] += ( align[i] - align[i - 1] - 1 ) ;*/ if ( cigar[ align[i] ] == 'M' ) { if ( seq[i] != origSeq[ align[i]] ) { verboseType = 1 ; ++baseFP[exp] ; } } else if ( cigar[align[i]] == 'E' ) { if ( seq[i] == origSeq[align[i]] ) { if ( verboseType == 0 ) verboseType = 2 ; //printf( "%d %d\n", i, align[i] ) ; ++baseTP[exp] ; } else { if ( verboseType == 0 || verboseType == 2 ) verboseType = 3 ; ++baseFN[exp] ; } } } for ( k = lena - 1 ; k >= 0 ; --k ) if ( visited[k] ) break ; for ( i = 0 ; i < k + 1 ; ++i ) if ( visited[i] == false ) { if ( cigar[i] == 'M' ) ++baseFP[exp] ; else ++baseFN[exp] ; } for ( i = 0 ; ; ++i ) if ( align[i] == -1 ) --baseFP[exp] ; else break ; for ( i = lenb - 1 ; ; --i ) if ( align[i] == -1 ) --baseFP[exp] ; else break ; //printf( "\n" ) ; /*int verboseType = 0 ; for ( i = 0 ; seq[i] ; ++i ) { if ( cigar[i] == 'M' ) { if ( seq[i] != origSeq[i] ) { verboseType = 1 ; ++baseFP[exp] ; } } else if ( cigar[i] == 'E' ) { if ( seq[i] == origSeq[i] ) { if ( verboseType == 0 ) verboseType = 2 ; ++baseTP[exp] ; } else { if ( verboseType == 0 || verboseType == 2 ) verboseType = 3 ; ++baseFN[exp] ; } } }*/ if ( baseVerbose ) { if ( verboseType == 1 ) printf( "FP\n" ) ; else if ( verboseType == 2 ) printf( "TP\n" ) ; else if ( verboseType == 3 ) printf( "FN\n" ) ; } } int TP, FP, FN ; printf( "correct #: %d\n" "error #: %d\n", correctCount, errorCount ) ; printf( "Original Correct Reads Count: %d\n", sameCount ) ; printf( "Trimmed Reads Count: %d. Average trim length: %lf\n", trimCount, (double)trimSum / trimCount ) ; printf( "Overall:\n") ; TP = FP = FN = 0 ; for ( i = 0 ; i < 4 ; ++i ) { TP += baseTP[i] ; FP += baseFP[i] ; FN += baseFN[i] ; } printf( "\nBase level:\n" ) ; printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ; double recall = ( double )TP/(TP+FN) ; double precision = (double)TP/(TP+FP) ; printf( "Recall: %lf\n" "Precision: %lf\n" "F-score: %lf\n" "Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ), (double)(TP-FP)/(TP+FN) ) ; TP = FP = FN = 0 ; for ( i = 0 ; i < 4 ; ++i ) { TP += readTP[i] ; FP += readFP[i] ; FN += readFN[i] ; } printf( "\nRead level:\n" ) ; printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ; recall = ( double )TP/(TP+FN) ; precision = (double)TP/(TP+FP) ; printf( "Recall: %lf\n" "Precision: %lf\n" "F-score: %lf\n" "Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ), (double)(TP-FP)/(TP+FN) ) ; if ( useExp ) { for ( i = 0 ; i < 3 ; ++i ) { TP = baseTP[i] ; FP = baseFP[i] ; FN = baseFN[i] ; printf( "\nExpress level: %d\n", i ) ; printf( "Base level:\n" ) ; printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ; double recall = ( double )TP/(TP+FN) ; double precision = (double)TP/(TP+FP) ; printf( "Recall: %lf\n" "Precision: %lf\n" "F-score: %lf\n" "Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ), (double)(TP-FP)/(TP+FN) ) ; TP = readTP[i] ; FP = readFP[i] ; FN = readFN[i] ; printf( "\nRead level:\n" ) ; printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ; recall = ( double )TP/(TP+FN) ; precision = (double)TP/(TP+FP) ; printf( "Recall: %lf\n" "Precision: %lf\n" "F-score: %lf\n" "Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ), (double)(TP-FP)/(TP+FN) ) ; } } return 0 ; }
// Returns a breakpoint for a cluster of connected reads. If no viable // breakpoint can be found, it returns a breakpoint with position -1. // Note: returned pointer must be deleted by caller. static void get_breakpoints(std::vector<simple_read*>& cluster, std::vector<bam_info>& bam_sources, int insert_size, int cluster_tid, char cluster_strand, const Chromosome* chromosome, std::map<std::string, std::string>& sample_dict, std::vector<MEI_breakpoint>& breakpoints, UserDefinedSettings* userSettings) { std::vector<SPLIT_READ> split_reads; int outer_read_pos = (cluster_strand == Minus)? cluster.at(cluster.size()-1)->pos : cluster.at(0)->pos; // int inner_read_pos = (cluster_strand == Minus)? cluster.at(0)->pos : cluster.at(cluster.size()-1)->pos; get_split_reads_for_cluster(bam_sources, cluster_strand, outer_read_pos, chromosome, split_reads); // Search for split reads with a mate close to the outer read of the // cluster. Store candidate breakpoints. // Todo: speedup by exploiting the fact that both clusters and split reads are sorted // by mapping location. std::map<int, std::vector<simple_read> > bio_candidate_breakpoints; for (size_t i = 0; i < split_reads.size(); i++) { SPLIT_READ read = split_reads.at(i); char anchor_strand = read.MatchedD; if (cluster_strand != anchor_strand) { continue; } unsigned int comp_candidate_bp = read.getLastAbsLocCloseEnd(); unsigned int bio_candidate_bp = get_bio_chr_index(comp_candidate_bp); if (bio_candidate_breakpoints.find(bio_candidate_bp) == bio_candidate_breakpoints.end()) { // New candidate, look ahead to check whether there are enough supporting split reads. int SR_support = 1; for (size_t j = i + 1; j < split_reads.size(); j++) { if (split_reads.at(j).getLastAbsLocCloseEnd() == comp_candidate_bp && split_reads.at(j).MatchedD == cluster_strand) { SR_support++; } } if (SR_support < userSettings->MIN_DD_BREAKPOINT_SUPPORT) { // Not enough support, skip it. continue; } else { std::vector<simple_read> new_bp_split_reads; bio_candidate_breakpoints.insert(std::make_pair(bio_candidate_bp, new_bp_split_reads)); } } // Store the unmatched sequence as it should be matched on the opposite strand of // the mapped mate. std::string whole_sequence; std::string mapped_part; std::string unmapped_part; if (anchor_strand == Plus) { whole_sequence = ReverseComplement(read.getUnmatchedSeq()); mapped_part = whole_sequence.substr(0, read.CloseEndLength); unmapped_part = whole_sequence.substr(read.CloseEndLength, whole_sequence.length()); } else { whole_sequence = read.getUnmatchedSeq(); mapped_part = whole_sequence.substr(whole_sequence.length() - read.CloseEndLength, whole_sequence.length()); unmapped_part = whole_sequence.substr(0, whole_sequence.length() - read.CloseEndLength); } std::string sample_name; get_sample_name(read.read_group, sample_dict, sample_name); simple_read simple_split_read(read.Name, -1, -1, '?', sample_name, whole_sequence, mapped_part, unmapped_part); (*bio_candidate_breakpoints.find(bio_candidate_bp)).second.push_back(simple_split_read); } char SR_mapping_strand = (cluster_strand == Plus)? Minus : Plus; // Remove any split reads for which a far end can be found locally, these are // assumed to contribute to some local variants. // Todo: determine region that is searched for far end. std::map<int, std::vector<simple_read> >::iterator map_iter; for (map_iter = bio_candidate_breakpoints.begin(); map_iter != bio_candidate_breakpoints.end(); ++map_iter) { std::string mapped_consensus = get_consensus_unmapped((*map_iter).second, SR_mapping_strand); std::vector<simple_read> sreads = (*map_iter).second; if (mapped_consensus.length() == 0) { LOG_DEBUG(*logStream << time_log() << "Consensus building failed for split read mapping ends (" << map_iter->second.size() << " reads @ " << map_iter->first << ")" << std::endl); continue; } int bio_bp = (*map_iter).first; // If far end consensus is not found in local window, store breakpoint. size_t FE_window_start = std::max(0, get_comp_chr_index(bio_bp) - userSettings->MIN_DD_MAP_DISTANCE); size_t FE_window_size = std::min(chromosome->getCompSize() - (unsigned) FE_window_start, 2 * (unsigned) userSettings->MIN_DD_MAP_DISTANCE); if (!contains_subseq_any_strand(mapped_consensus, chromosome->getSeq().substr(FE_window_start, FE_window_size), MIN_CONSENSUS_LENGTH)) { MEI_breakpoint bp(cluster_tid, bio_bp, cluster_strand); bp.associated_split_reads = (*map_iter).second; // Link associated discordant reads (all reads from cluster) and split reads. std::vector<simple_read*>::iterator read_iter; for (read_iter = cluster.begin(); read_iter != cluster.end(); ++read_iter) { bp.associated_reads.push_back(*(*read_iter)); } breakpoints.push_back(bp); } } }
void CombineReads(const std::string & CurrentChrSeq, const char & Strand, const std::vector <SPLIT_READ> & input_reads, const std::vector <unsigned int> & Index_Of_Useful_Reads, std::vector <SPLIT_READ> & output_reads) { //std::cout << "start of CombineReads" << std::endl; std::string Spacer = ""; unsigned Max_ReadLength = 0; unsigned Max_AssembledLength = 0; unsigned Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[0]].LeftMostPos; SPLIT_READ output_one_read;// = input_reads[Index_Of_Useful_Reads[0]]; unsigned Index2Read4Copy = 0; //unsigned Min_Close_Size = 10000; for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) { //if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size) { // Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size(); // Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]] //} //std::cout << Strand << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() << std::endl; //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq << std::endl; if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos < (int)Min_LeftMostPos) Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos; if (input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > (short)Max_ReadLength) Max_ReadLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + (unsigned)input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > Max_AssembledLength) Max_AssembledLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); } Max_AssembledLength = Max_AssembledLength - Min_LeftMostPos; if ((float)Max_AssembledLength < Max_ReadLength * 1.3) return; std::cout << "Max_AssembledLength " << Max_AssembledLength << std::endl; for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) { Spacer.clear(); if (Strand == '+') { for (unsigned SpacerIndex = 0; SpacerIndex < Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); SpacerIndex++) Spacer += " "; //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl; } else { for (unsigned SpacerIndex = 0; SpacerIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos; SpacerIndex++) Spacer += " "; //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl; } } if (Strand == '+') { // UnmatchedSeq std::cout << "+ Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos << "\nref: \n" << ReverseComplement(CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength)) << std::endl; } else if (Strand == '-') { std::cout << "- Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos << "\nref: \n" << CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength) << std::endl; } unsigned Count[5][Max_AssembledLength]; float Ratio[5][Max_AssembledLength]; for (short i = 0; i < 5; i++) { for (unsigned j = 0; j < Max_AssembledLength; j++) { Count[i][j] = 0; Ratio[i][j] = 0.0; } } if (Strand == '+') { for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) { //std::cout << std::endl; for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) { //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex]; switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) { case 'A': Count[0][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++; break; // 00000000 case 'C': Count[1][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++; break; // 00010000 case 'G': Count[2][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++; break; // 00100000 case 'T': Count[3][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++; break; // 00110000 default: Count[4][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++; // 01000000 } } } } else if (Strand == '-') { for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) { //std::cout << std::endl; for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) { //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex]; switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) { case 'A': Count[0][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++; break; // 00000000 case 'C': Count[1][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++; break; // 00010000 case 'G': Count[2][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++; break; // 00100000 case 'T': Count[3][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++; break; // 00110000 default: Count[4][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++; // 01000000 } } } } float Sum; for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) { Sum = Count[0][PosIndex] + Count[1][PosIndex] + Count[2][PosIndex] + Count[3][PosIndex] + Count[4][PosIndex]; //std::cout << Count[0][PosIndex] << " " << Count[1][PosIndex] << " " << Count[2][PosIndex] << " " << Count[3][PosIndex] << " " << Count[4][PosIndex] << std::endl; for (unsigned BaseIndex = 0; BaseIndex < 5; BaseIndex++) { Ratio[BaseIndex][PosIndex] = Count[BaseIndex][PosIndex] / Sum; } } std::string OutputOneStr = ""; const float RatioCutoff = 0.66; unsigned Max_Base_Count = 0; short Max_Base_Count_Index = -1; for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) { if (Ratio[0][PosIndex] > RatioCutoff) { OutputOneStr += "A"; continue; } if (Ratio[1][PosIndex] > RatioCutoff) { OutputOneStr += "C"; continue; } if (Ratio[2][PosIndex] > RatioCutoff) { OutputOneStr += "G"; continue; } if (Ratio[3][PosIndex] > RatioCutoff) { OutputOneStr += "T"; continue; } for (short BaseIndex = 0; BaseIndex < 4; BaseIndex++) { if (Count[BaseIndex][PosIndex] > Max_Base_Count && Count[BaseIndex][PosIndex] >= 3) Max_Base_Count_Index = BaseIndex; } //if (Max_Base_Count_Index != -1) { switch (Max_Base_Count_Index) { case 0: OutputOneStr += "A"; break; case 1: OutputOneStr += "C"; break; case 2: OutputOneStr += "G"; break; case 3: OutputOneStr += "T"; break; case -1: OutputOneStr += "N"; break; default: break; } //} //else OutputOneStr += "N"; } if (Strand == '+') { std::cout << "Final merged string +: original\n" << (OutputOneStr) << std::endl; std::cout << "Final merged string +: convert to ref\n" << ReverseComplement(OutputOneStr) << std::endl; } else { std::cout << "Final merged string -: original\n" << (OutputOneStr) << std::endl; std::cout << "Final merged string : convert to ref\n" << (OutputOneStr) << std::endl; }//std::cout << "Final merged string: -\n" << (OutputOneStr) << std::endl; //std::cout << "here1" << std::endl; unsigned Count_N = 0; for (unsigned pos_index = 0; pos_index < OutputOneStr.size(); pos_index++) { if (OutputOneStr[pos_index] == 'N') Count_N++; } if ((float)Count_N < OutputOneStr.size() * 0.05) { unsigned Min_Close_Size = 10000; Index2Read4Copy = 0; // if the best one cannot be found due to N or whatever reasons, use the first read as the template for copy. //std::cout << "Original Index2Read4Copy: " << Index2Read4Copy << std::endl; for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) { if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size && OutputOneStr.find(input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()) !=std::string::npos) { // quick fix here: need more work Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size(); Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]] //std::cout << "Changed Index2Read4Copy: " << Index2Read4Copy << std::endl; } } //std::cout << "here2" << std::endl; output_one_read = input_reads[Index_Of_Useful_Reads[Index2Read4Copy]]; //std::cout << "here2a" << std::endl; output_one_read.setUnmatchedSeq( OutputOneStr ); //update std::map <std::string, int> ReadCountPerSample; GetReadCountPerSample(input_reads, Index_Of_Useful_Reads, output_one_read); //std::cout << "here2b" << std::endl; //std::cout << "Before: " << output_one_read.UP_Close.size() << std::endl; output_one_read.UP_Close.clear(); //std::cout << "here3" << std::endl; output_one_read.Thickness = Index_Of_Useful_Reads.size(); //std::cout << "Thickness " << output_one_read.Thickness << std::endl; GetCloseEnd(CurrentChrSeq, output_one_read); //std::cout << "After: " << output_one_read.UP_Close.size() << std::endl; output_reads.push_back(output_one_read); //std::cout << "here4" << std::endl; //std::cout << "end of CombineReads" << std::endl; } }