MultiReadMutationScorer<R>::MultiReadMutationScorer(const QuiverConfig& quiverConfig,
                                                     std::string tpl)
     : recursor_(quiverConfig.MovesAvailable, quiverConfig.Banding),
       quiverConfig_(quiverConfig),
       fwdTemplate_(tpl),
       revTemplate_(ReverseComplement(tpl)),
       scorerForRead_()
 {
     DEBUG_ONLY(CheckInvariants());
 }
Esempio n. 2
0
void FoldPalindrome(std::string &s, int kmer_k, bool is_loop) {
    if (is_loop) {
        for (unsigned i = 1; i + kmer_k <= s.length(); ++i) {
            std::string rc = s.substr(i, kmer_k);
            ReverseComplement(rc);
            if (rc == s.substr(i - 1, kmer_k)) {
                assert(i <= s.length() / 2);
                s = s.substr(i, s.length() / 2);
                break;
            }
        }
    } else {
        int num_kmer = s.length() - kmer_k + 1;
        assert(num_kmer % 2 == 0);
        s.resize(num_kmer / 2 + (kmer_k - 1));
    }
}
Esempio n. 3
0
void TryLI(std::map<std::string,int> & ChrName2Index, ControlState & CurrentState, const Assembly & OneSV,  std::vector <SPLIT_READ> & First, std::vector <SPLIT_READ> & Second, std::ofstream & ASM_Output) {
    short MinimumOverlap = 10;
    short MaximumOverlap;// = min();
    short MaxMismatch = 3;
    short CountMismatch;
    short FirstLength, SecondLength;
    std::string FirstOne, SecondOne, MergedString;
    std::cout << "TryLI" << std::endl;
    for (unsigned ReadIndex = 0; ReadIndex < First.size(); ReadIndex++) {
        std::cout << First[ReadIndex].MatchedD << " " << First[ReadIndex].MatchedRelPos << std::endl;
    }
    for (unsigned ReadIndex = 0; ReadIndex < Second.size(); ReadIndex++) {
        std::cout << Second[ReadIndex].MatchedD << " " << Second[ReadIndex].MatchedRelPos << std::endl;
    }
    for (unsigned ReadIndex_Plus = 0; ReadIndex_Plus < First.size(); ReadIndex_Plus++) {
        if (First[ReadIndex_Plus].MatchedD == '-') continue;
        for (unsigned ReadIndex_Minus = 0; ReadIndex_Minus < Second.size(); ReadIndex_Minus++) {
            if (Second[ReadIndex_Minus].MatchedD == '+') continue;
            MaximumOverlap = std::min(First[ReadIndex_Plus].getReadLength(), Second[ReadIndex_Minus].getReadLength());
            std::cout << MaximumOverlap << std::endl;
            FirstOne = ReverseComplement(First[ReadIndex_Plus].getUnmatchedSeq());
            SecondOne = Second[ReadIndex_Minus].getUnmatchedSeq();
            FirstLength = FirstOne.size();
            SecondLength = SecondOne.size();
            std::cout << FirstOne << "\n" << SecondOne << "\n";
            for (short OverlapCount = MinimumOverlap; OverlapCount < MaximumOverlap; OverlapCount++) {
                CountMismatch = 0;
                for (short pos_index = 0; pos_index < OverlapCount; pos_index++) {
                    if (FirstOne[FirstLength - OverlapCount + pos_index] != SecondOne[pos_index]) {
                        ++CountMismatch;
                    }
                    if (CountMismatch > MaxMismatch) {
                        break;    
                    }
                } 
                if (CountMismatch <= MaxMismatch) {
                    MergedString = FirstOne.substr(0, FirstLength - OverlapCount) + SecondOne;
                    std::cout << "MergedString: " << OverlapCount << " " << MergedString << std::endl;
                    //ReportLI(AllChromosomes, ChrName2Index, CurrentState, OneSV, First[ReadIndex_Plus], Second[ReadIndex_Minus], MergedString, OverlapCount, ASM_Output);
                }
            }
        }
    }
}
Esempio n. 4
0
void Window::ScanReads( const std::vector <SPLIT_READ> &readsInWindow, 
                        unsigned short bamIndex, 
                        bool isTumor) {

    // openmp parallel 
    omp_set_num_threads( paramd.numberThreads );
#pragma omp parallel for
    for (unsigned short i=0; i<_siteCount; i++) {
        HomoSite *p = _startSite + i;
        unsigned long tsize = readsInWindow.size();
        for (unsigned long j=0; j<tsize; j++) {
            if ( readsInWindow[j].Mapped ) {
                if ( (readsInWindow[j].MatchedRelPos < p->lowcut) || (readsInWindow[j].MatchedRelPos > p->highcut) ) continue; 
            }
            unsigned short tCount = DoOneRead(readsInWindow[j].ReadSeq, p);
            if ( (tCount > 0) && (tCount < paramd.s_dispots) ) {
                if (isTumor) {
                    p->tumorDis[bamIndex][tCount-1]++;
                } else {
                    p->normalDis[bamIndex][tCount-1]++;
                }
            } else {
                // don't scan reverse if mapped
                if ( readsInWindow[j].Mapped ) continue; 
                // reverse
                std::string tStr = readsInWindow[j].ReadSeq;
                ReverseComplement(tStr);
                tCount = DoOneRead(tStr, p);
                if ( (tCount > 0) && (tCount < paramd.s_dispots) ) {
                    if (isTumor) {
                        p->tumorDis[bamIndex][tCount-1]++;
                    } else {
                        p->normalDis[bamIndex][tCount-1]++;
                    }
                }
            }
        }
    }
}
Esempio n. 5
0
void AlleleIdentity::PredictSequenceMotifSSE(const LocalReferenceContext &reference_context,
                             const TIonMotifSet & ErrorMotifs,
                             const ReferenceReader &ref_reader, int chr_idx) {

  //cout << "Hello from PredictSequenceMotifSSE" << endl;
  sse_prob_positive_strand = 0;
  sse_prob_negative_strand = 0;
  //long vcf_position = reference_context.position0+1;
  long var_position = reference_context.position0 + left_anchor; // This points to the first deleted base

  string seqContext;
  // status.isHPIndel && status.isDeletion implies reference_context.my_hp_length.at(left_anchor) > 1
  if (status.isHPIndel && status.isDeletion) {

    // cout << start_pos << "\t" << variant_context.refBaseAtCandidatePosition << variant_context.ref_hp_length << "\t" << variant_context.refBaseLeft << variant_context.left_hp_length << "\t" << variant_context.refBaseRight  << variant_context.right_hp_length << "\t";

    unsigned context_left = var_position >= 10 ? 10 : var_position;
    //if (var_position + reference_context.my_hp_length.at(left_anchor) + 10 < ref_reader.chr_size(chr_idx))
      seqContext = ref_reader.substr(chr_idx, var_position - context_left, context_left + (unsigned int)reference_context.my_hp_length[left_anchor] + 10);
    //  else
    //  seqContext = ref_reader.substr(chr_idx, var_position - context_left);

    if (seqContext.length() > 0 && context_left < seqContext.length()) {
      sse_prob_positive_strand = ErrorMotifs.get_sse_probability(seqContext, context_left);

       //cout << seqContext << "\t" << context_left << "\t" << sse_prob_positive_strand << "\t";

      context_left = seqContext.length() - context_left - 1;
      string reverse_seqContext;
      ReverseComplement(seqContext, reverse_seqContext);

      sse_prob_negative_strand = ErrorMotifs.get_sse_probability(reverse_seqContext, context_left);

     // cout << reverse_seqContext << "\t" << context_left << "\t" << sse_prob_negative_strand << "\t";

    }
  }
}
Esempio n. 6
0
SparsePoa::ReadKey SparsePoa::OrientAndAddRead(const std::string& readSequence,
                                               const PoaAlignmentOptions& /* alnOptions */,
                                               float minScoreToAdd)
{
    AlignConfig config = DefaultPoaConfig(AlignMode::LOCAL);
    Path outputPath;
    ReadKey key;

    if (graph_->NumReads() == 0) {
        graph_->AddFirstRead(readSequence, &outputPath);
        readPaths_.push_back(outputPath);
        reverseComplemented_.push_back(false);
        key = graph_->NumReads() - 1;
    } else {
        auto c1 = graph_->TryAddRead(readSequence, config, rangeFinder_);
        auto c2 = graph_->TryAddRead(ReverseComplement(readSequence), config, rangeFinder_);

        if (c1->Score() >= c2->Score() && c1->Score() >= minScoreToAdd) {
            graph_->CommitAdd(c1, &outputPath);
            readPaths_.push_back(outputPath);
            reverseComplemented_.push_back(false);
            key = graph_->NumReads() - 1;
        } else if (c2->Score() >= c1->Score() && c2->Score() >= minScoreToAdd) {
            graph_->CommitAdd(c2, &outputPath);
            readPaths_.push_back(outputPath);
            reverseComplemented_.push_back(true);
            key = graph_->NumReads() - 1;
        } else {
            key = -1;
        }

        delete c1;
        delete c2;
    }
    return key;
}
Esempio n. 7
0
int searchIndels(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window )
{

   static int Count_DI = 0;
   static int Count_DI_Plus = 0;
   static int Count_DI_Minus = 0;

   unsigned CloseIndex, FarIndex;

   std::vector<unsigned> DI[NumBoxes];
   unsigned TempBoxIndex;
   LOG_INFO(*logStream << "Searching deletion-insertions ... " << std::endl);

   //UserDefinedSettings *userSettings = UserDefinedSettings::Instance();

   for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) {
      SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex];
      if (currentRead.Used
            || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) {
         continue;
      }

      CloseIndex = currentRead.UP_Close.size() - 1;
      FarIndex = currentRead.UP_Far.size() - 1;
      if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches
            > (short) (1 + userSettings->Seq_Error_Rate * (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr))) {
         continue;
      }

      if (currentRead.MatchedD == Plus) {
         if (currentRead.UP_Far[FarIndex].Direction == Minus) {
            if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() &&
                  currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases &&
                  currentRead.UP_Far[FarIndex].AbsLoc > currentRead.UP_Close[CloseIndex].AbsLoc + 1) {
               currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + 1;
               currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - 1;
               currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1;
               currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr;

               currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()). substr( currentRead.BP + 1, currentRead.NT_size);
               currentRead.IndelSize = (currentRead.Right - currentRead.Left) + currentRead.NT_size - currentRead.getReadLengthMinus();

               currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter;
               currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter;

               if (1) {

                  if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                     saveReadForNextCycle(currentRead, currentState.FutureReads_SR);
                  } else {
                     if (readInSpecifiedRegion( currentRead, userSettings->getRegion() ) ) {
                        TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize;
                        if (TempBoxIndex < NumBoxes) {

                           DI[TempBoxIndex]. push_back(ReadIndex);
                           currentRead.Used = true;
                           Count_DI++;
                           Count_DI_Plus++;
                        }
                     }
                  }
               }
            }
         }
      } else if (currentRead.MatchedD == Minus) {
         if (currentRead.UP_Far[FarIndex].Direction == Plus) {
            if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() &&
                  currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr >= userSettings->Min_Num_Matched_Bases &&
                  currentRead.UP_Close[CloseIndex].AbsLoc > currentRead.UP_Far[FarIndex].AbsLoc + 1) {

               currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + 1;
               currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1;
               currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1;
               currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Close[CloseIndex].LengthStr - currentRead.UP_Far[FarIndex].LengthStr;
               currentRead.NT_str = currentRead.getUnmatchedSeq(). substr( currentRead.BP + 1, currentRead.NT_size);

               currentRead.IndelSize = (currentRead.Right - currentRead.Left) - currentRead.getReadLengthMinus() + currentRead.NT_size;
               currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter;
               currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter;
               {
                  if ( 1 ) {

                     if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                        saveReadForNextCycle( currentRead, currentState.FutureReads_SR);
                     } else {
                        if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) {
                           TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize;
                           if (TempBoxIndex < NumBoxes) {

                              DI[TempBoxIndex]. push_back(ReadIndex);
                              currentRead.Used = true;
                              Count_DI++;
                              Count_DI_Minus++;
                           }
                        }
                     }
                  }
               }
            }
         }
      }
   }
   LOG_INFO(*logStream << "Total: " << Count_DI << "\t+" << Count_DI_Plus << "\t-"
            << Count_DI_Minus << std::endl);
   std::ofstream DeletionOutf( userSettings->getDOutputFilename().c_str(), std::ios::app);
   std::ofstream inversionsOutf( userSettings->getINVOutputFilename().c_str(), std::ios::app);
   SortOutputDI(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, DI, DeletionOutf, inversionsOutf);
   DeletionOutf.close();
   for (unsigned int i = 0; i < NumBoxes; i++) {
      DI[i].clear();
   }

   return EXIT_SUCCESS;
}
Esempio n. 8
0
int searchInversionsNT(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window)
{
   static int Count_Inv_NT = 0;
   static int Count_Inv_NT_Plus = 0;
   static int Count_Inv_NT_Minus = 0;

   std::vector<unsigned> Inv_NT[NumBoxes];

   int CloseIndex = 0;
   int FarIndex = 0;

	//UserDefinedSettings *userSettings = UserDefinedSettings::Instance();
    
   LOG_INFO(*logStream << "Searching inversions with non-template sequence ... "
            << std::endl);
   for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) {
		SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex];
      if (currentRead.Used
            || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) {
         continue;
      }
      CloseIndex = currentRead.UP_Close.size() - 1;
      FarIndex = currentRead.UP_Far.size() - 1;
      if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > 
			(short) (1 + userSettings->Seq_Error_Rate * (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr))) {
         continue;
      }
      if (currentRead.UP_Close[0].Strand != currentRead.UP_Far[0].Strand && 
				currentRead.UP_Close[0].Direction == currentRead.UP_Far[0].Direction) {
         if (currentRead.MatchedD == Plus) {
            if (currentRead.UP_Far[FarIndex]. Direction == Plus) {
               if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() && 
						 currentRead.UP_Far[FarIndex].AbsLoc > currentRead.UP_Close[CloseIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion && 
						 currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases ) {

                  currentRead.Left = (currentRead. UP_Close[CloseIndex].AbsLoc + 1) - currentRead.UP_Close[CloseIndex].LengthStr;
                  currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + currentRead.getReadLength();
                  currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1;

                  currentRead.IndelSize = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc;

                  currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr; // NT_2str
                  currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()).substr(currentRead.BP + 1, currentRead.NT_size);
                  currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc + 1 - g_SpacerBeforeAfter;
                  currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter;
                  if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                     saveReadForNextCycle(currentRead, currentState.FutureReads_SR);
                  }
                  else {
                     if ( 1 ) {
                        if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) {
                           Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex);
                           currentRead.Used = true;
                           Count_Inv_NT++;
                           Count_Inv_NT_Plus++;
                        }
                     }
                  }

               }
               // anchor inside reversed block.
               if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.getReadLength() && 
						 currentRead.UP_Far[FarIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion < currentRead.UP_Close[CloseIndex].AbsLoc && 
                   currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) {

                  currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + currentRead.getReadLength();
                  currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + 1;
                  currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1;

                  currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Far[FarIndex].AbsLoc;

                  currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr;
                  currentRead.NT_str = currentRead.getUnmatchedSeq(). substr( currentRead.BP + 1, currentRead.NT_size);
                  currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter;
                  currentRead.BPLeft = (currentRead.UP_Far[FarIndex].AbsLoc + 1) - g_SpacerBeforeAfter;

                  if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                     saveReadForNextCycle(currentRead, currentState.FutureReads_SR);
                  }
                  else {
                     if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) {
                        Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex);
                        currentRead.Used = true;
                        Count_Inv_NT++;
                        Count_Inv_NT_Plus++;
                     }
                  }
               }
            }

         }
         else if (currentRead.MatchedD == Minus) {
            if (currentRead.UP_Far[FarIndex]. Direction == Minus) {
               // anchor outside reversed block.
               if (currentRead.UP_Close[CloseIndex].LengthStr  + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() && 
                   currentRead.UP_Close[CloseIndex].AbsLoc > currentRead.UP_Far[FarIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion && 
                   currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) {

                  currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - currentRead.getReadLength();
                  currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1;
                  currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1;

                  currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Far[FarIndex].AbsLoc;

                  currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr;
                  currentRead.NT_str = currentRead.getUnmatchedSeq().substr( currentRead.BP + 1, currentRead.NT_size);
                  currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter;
                  currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - 1 - g_SpacerBeforeAfter;
                  if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                     saveReadForNextCycle(currentRead, currentState.FutureReads_SR);
                  }
                  else {
                     if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) {
                        Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex);
                        currentRead.Used = true;

                        Count_Inv_NT++;
                        Count_Inv_NT_Minus++;
                     }
                  }
               }
               // anchor inside reversed block.
               if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr < currentRead.getReadLength() && 
                   currentRead.UP_Close[CloseIndex].AbsLoc + userSettings->MIN_IndelSize_Inversion < currentRead.UP_Far[FarIndex].AbsLoc && 
                   currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= userSettings->Min_Num_Matched_Bases) {

                  currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - 1;
                  currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - currentRead.getReadLength();
                  currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1;

                  currentRead.IndelSize = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc;

                  currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Far[FarIndex].LengthStr - currentRead.UP_Close[CloseIndex].LengthStr;
                  currentRead.NT_str = ReverseComplement( currentRead. getUnmatchedSeq()). substr( currentRead.BP + 1, currentRead.NT_size);
                  currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter;
                  currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - 1 - g_SpacerBeforeAfter;

                  if (readTransgressesBinBoundaries( currentRead, window.getEnd())) {
                     saveReadForNextCycle(currentRead, currentState.FutureReads_SR);
                  }
                  else {
                     if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) {

                        Inv_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex);
                        currentRead.Used = true;

                        Count_Inv_NT++;
                        Count_Inv_NT_Minus++;
                     }
                  }
               }
            }
         }
      }
   }
   LOG_INFO(*logStream << "Total: " << Count_Inv_NT << "\t+" << Count_Inv_NT_Plus << "\t-" << Count_Inv_NT_Minus << std::endl);
   std::ofstream InversionOutf(userSettings->getINVOutputFilename().c_str(), std::ios::app);
   SortOutputInv_NT(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, Inv_NT, InversionOutf);
   for (unsigned int i = 0; i < NumBoxes; i++) {
      Inv_NT[i].clear();
   }

   return EXIT_SUCCESS;
}
Esempio n. 9
0
int main( int argc, char *argv[] )
{
	int i, j, k ;
	int len ;
	char *p ;
	FILE *fp ;
	int FILE_TYPE ; // 0-fasta, 1-fastq
	int correctCount = 0, errorCount = 0 ;
	int sameCount = 0 ;
	int trimCount = 0, trimSum = 0 ;
	int exp ; //0-low,1-med,2-high,3-unknown
	bool verbose = false ;
	bool baseVerbose = false ;
	int baseTP[4] = {0,0,0,0}, baseFP[4] = {0,0,0,0}, baseFN[4] = {0,0,0,0} ;
	int readTP[4] = {0,0,0,0}, readFP[4] = {0,0,0,0}, readFN[4] = {0,0,0,0} ;
	bool useExp = false ;
	bool ignoreIndel = false ;
	bool allowTrim = false ;
	int polyA = 100000 ;
		
	for ( i = 1 ; i < argc ; ++i )
	{
		if ( !strcmp( argv[i],"-v" ) )
			verbose = true ;
		else if ( !strcmp( argv[i], "-bv" ) )
			baseVerbose = true ;
		else if ( !strcmp( argv[i], "-exp" ) )
			useExp = true ;
		else if ( !strcmp( argv[i], "-polyA" ) )
		{
			polyA = atoi( argv[i + 1] ) ;
			++i ;
		}
		else if ( !strcmp( argv[i], "-noindel" ) )
			ignoreIndel = true ;
		else if ( !strcmp( argv[i], "-trim" ) )
			allowTrim = true ;
		else
		{
			printf( "Unknown para\n" ) ;
			exit( 0 ) ;
		}
	}

	// Decide whether it is FASTQ or FASTA.
	fp = stdin ;//fopen( argv[1], "r" ) ;
	fscanf( fp, "%s", buffer ) ;
	if ( buffer[0] == '>' )
		FILE_TYPE = 0 ;
	else
		FILE_TYPE = 1 ;
	//fclose( fp ) ;

	//fp = stdin ;//fopen( argv[1], "r" ) ;
	while ( fgets( id, sizeof( id ), fp ) != NULL )
	{
		if ( FILE_TYPE == 0 )
		{
			fgets( seq, sizeof( seq ), fp ) ;
		}
		else if ( FILE_TYPE == 1 )
		{
			fgets( seq, sizeof( seq ), fp ) ;
			fgets( buffer, sizeof( buffer ), fp ) ;
			fgets( qual, sizeof( qual ), fp ) ;
		}
		//printf( "%s%s%s", id, seq,qual ) ;
		// Clean the return symbol	
		len = strlen( id ) ;		
		if ( id[len - 1] == '\n')
			id[len - 1] = '\0' ;
		len = strlen( seq ) ;
		if ( seq[len - 1] == '\n' )
			seq[len - 1] = '\0' ;
		if ( qual[len - 1] == '\n' )
			qual[len - 1] = '\0' ;
		
		// Parse the id field
		p = FindIdColumn( id, "haplotype_infix" ) ;
		sscanf( p, "%s", origSeq ) ;

		// Test whether this contain a polyA tag
		j = k = 0 ;
		for ( i = 0 ; origSeq[i] ; ++i )
		{
			if ( origSeq[i] == 'A' )
			{
				++j ;
			}
			else if ( origSeq[i] == 'T' )
			{
				++k ;
			}
			else
			{
				if ( j >= polyA || k >= polyA )
					break ;
				j = k = 0 ;
			}
		}
		if ( j >= polyA || k >= polyA )
			continue ;

		if ( ignoreIndel && strlen( seq ) != strlen( origSeq ) )
			continue ;
		/*if ( strlen( seq ) != strlen( origSeq ) )
		{
			printf( "%s\n%s\n", id, seq ) ;
		}*/
		p = FindIdColumn( id, "edit_string" ) ;
		sscanf( p, "%s", cigar ) ;
		
		p = FindIdColumn( id, "strand=reverse" ) ;
		if ( p != NULL )
		{
			ReverseComplement( origSeq ) ;
		}
		
		p = FindIdColumn( id, "exp" ) ;
		if ( p != NULL )
		{
			sscanf( p, "%s", buffer ) ;
			if ( !strcmp( buffer, "high" ) )
				exp = 2 ;
			else if ( !strcmp( buffer, "medium" ) )
				exp = 1 ;
			else if ( !strcmp( buffer, "low" ) )
				exp = 0 ;
			else
				exp = 3 ;
		}
		else
			exp = 3 ;
		if ( verbose || baseVerbose )
			printf( "%s\n", id ) ;
		//printf( "%s %s\n", seq, origSeq ) ;
		if ( StrCompWithTrim( origSeq, seq ) )
		{
			/*if ( verbose )
			{
				printf( "Diff:\n%s\n%s\n", id, seq ) ;
			}*/
			++errorCount ;
			
			for ( i = 0 ; cigar[i] ; ++i )
			{
				if ( cigar[i] != 'M' )
					break ;
			}
			if ( cigar[i] )
			{
				if ( verbose )
					printf( "FN\n" ) ;
				++readFN[exp] ;
			}
			else
			{
				if ( verbose )
					printf( "FP\n" ) ;
				++readFP[exp] ;
			}
		}
		else
		{
			/*if ( verbose )
			{
				printf( "Same:\n%s\n%s\n", id, seq ) ;
			}*/
			//printf( "S\n" ) ;
			++correctCount ;
			
			for ( i = 0 ; cigar[i] ; ++i )
			{
				if ( cigar[i] != 'M' )
					break ;
			}
			if ( cigar[i] )
			{
				if ( verbose )
					printf( "TP\n" ) ;
				++readTP[exp] ;
			}
		}

		for ( i = 0 ; cigar[i] ; ++i )
		{
			if ( cigar[i] != 'M' )
				break ;
		}
		if ( !cigar[i] )
			++sameCount ;


		p = FindIdColumn( id, "trim" ) ;
		if ( p != NULL )
		{
			int tmp = atoi( p ) ;
			//printf( "%d %s\n", tmp, p ) ;
			++trimCount ;
			trimSum += tmp ;
		}

		// Collect information of TP, FP, FN for base level
		int verboseType = 0 ;
		int lena = strlen( origSeq ) ;
		int lenb = strlen( seq ) ;
		bool visited[2048] ;
		memset( visited, false, sizeof( bool ) * lena ) ;
		if ( allowTrim == false )
		{
			Alignment( lena, origSeq, lenb, seq ) ; 
		}
		else
		{
			for ( i = 0 ; seq[i] ; ++i )
				align[i] = i ;
		}
		for ( i = 0 ; seq[i] ; ++i )
		{
			//printf( "(%d, %d) ", align[i], baseFP[exp] ) ;
			if ( align[i] == -1 )
			{
				//if ( i > 2 && i < lenb - 2 )
				//	printf( "%s\n%s\n", id, seq ) ;
				++baseFP[exp] ;
				continue ;
			}
			visited[ align[i] ] = true ;
			/*if ( i == 0 )
				baseFP[exp] += align[i] ;
			else 
				baseFP[exp] += ( align[i] - align[i - 1] - 1 ) ;*/

			if ( cigar[ align[i] ] == 'M' )
			{
				if ( seq[i] != origSeq[ align[i]] )
				{
					verboseType = 1 ;
					++baseFP[exp] ;
				}
			}
			else if ( cigar[align[i]] == 'E' )
			{
				if ( seq[i] == origSeq[align[i]] )
				{
					if ( verboseType == 0 )
						verboseType = 2 ;
					//printf( "%d %d\n", i, align[i] ) ;
					++baseTP[exp] ;
				}
				else
				{
					if ( verboseType == 0 || verboseType == 2 )
						verboseType = 3 ;
					++baseFN[exp] ;
				}
			}
		}
		
		for ( k = lena - 1 ; k >= 0 ; --k )
			if ( visited[k] )
				break ;

		for ( i = 0 ; i < k + 1 ; ++i )
			if ( visited[i] == false )
			{
				if ( cigar[i] == 'M' )
					++baseFP[exp] ;
				else
					++baseFN[exp] ;
			}

		for ( i = 0 ; ; ++i )
			if ( align[i] == -1 )
				--baseFP[exp] ;
			else
				break ;
		for ( i = lenb - 1 ; ; --i )
			if ( align[i] == -1 )	
				--baseFP[exp] ;
			else
				break ;
		//printf( "\n" ) ;
		/*int verboseType = 0 ;
		for ( i = 0 ; seq[i] ; ++i )
		{
			if ( cigar[i] == 'M' )
			{
				if ( seq[i] != origSeq[i] )
				{
					verboseType = 1 ;
					++baseFP[exp] ;
				}
			}
			else if ( cigar[i] == 'E' )
			{
				if ( seq[i] == origSeq[i] )
				{
					if ( verboseType == 0 )
						verboseType = 2 ;
					++baseTP[exp] ;
				}
				else
				{
					if ( verboseType == 0 || verboseType == 2 )
						verboseType = 3 ;
					++baseFN[exp] ;
				}
			}
		}*/

		if ( baseVerbose )
		{
			if ( verboseType == 1 )
				printf( "FP\n" ) ;
			else if ( verboseType == 2 )
				printf( "TP\n" ) ;
			else if ( verboseType == 3 )
				printf( "FN\n" ) ;
		}
 	}

	int TP, FP, FN ;
	printf( "correct #: %d\n"
		"error #: %d\n", correctCount, errorCount ) ;
	printf( "Original Correct Reads Count: %d\n", sameCount ) ;
	printf( "Trimmed Reads Count: %d. Average trim length: %lf\n", trimCount, (double)trimSum / trimCount ) ;

	printf( "Overall:\n") ;
	TP = FP = FN = 0 ;
	for ( i = 0  ; i < 4 ; ++i )
	{
		TP += baseTP[i] ;
		FP += baseFP[i] ;
		FN += baseFN[i] ;
	}
	printf( "\nBase level:\n" ) ;
	printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ;
	double recall = ( double )TP/(TP+FN) ;
	double precision = (double)TP/(TP+FP) ;
	printf( "Recall: %lf\n"
		"Precision: %lf\n"
		"F-score: %lf\n"
		"Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ),
				(double)(TP-FP)/(TP+FN) ) ;
	
	TP = FP = FN = 0 ;
	for ( i = 0  ; i < 4 ; ++i )
	{
		TP += readTP[i] ;
		FP += readFP[i] ;
		FN += readFN[i] ;
	}
	printf( "\nRead level:\n" ) ;
	printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ;
	recall = ( double )TP/(TP+FN) ;
	precision = (double)TP/(TP+FP) ;
	printf( "Recall: %lf\n"
			"Precision: %lf\n"
			"F-score: %lf\n"
			"Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ),
			(double)(TP-FP)/(TP+FN) ) ;

	if ( useExp )
	{
		for ( i = 0 ; i < 3 ; ++i )
		{
			TP = baseTP[i] ;
			FP = baseFP[i] ;
			FN = baseFN[i] ;
			printf( "\nExpress level: %d\n", i ) ;
			printf( "Base level:\n" ) ;
			printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ;
			double recall = ( double )TP/(TP+FN) ;
			double precision = (double)TP/(TP+FP) ;
			printf( "Recall: %lf\n"
					"Precision: %lf\n"
					"F-score: %lf\n"
					"Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ),
					(double)(TP-FP)/(TP+FN) ) ;

			TP = readTP[i] ;
			FP = readFP[i] ;
			FN = readFN[i] ;
			printf( "\nRead level:\n" ) ;
			printf( "TP: %d\nFP: %d\nFN: %d\n", TP, FP, FN ) ;
			recall = ( double )TP/(TP+FN) ;
			precision = (double)TP/(TP+FP) ;
			printf( "Recall: %lf\n"
					"Precision: %lf\n"
					"F-score: %lf\n"
					"Gain: %lf\n", recall, precision, 2*recall*precision / ( recall + precision ),
					(double)(TP-FP)/(TP+FN) ) ;

		}
	}
	return 0 ;
}
Esempio n. 10
0
// Returns a breakpoint for a cluster of connected reads.  If no viable
// breakpoint can be found, it returns a breakpoint with position -1.
// Note: returned pointer must be deleted by caller.
static void get_breakpoints(std::vector<simple_read*>& cluster, std::vector<bam_info>& bam_sources, int insert_size,
                            int cluster_tid, char cluster_strand, const Chromosome* chromosome, 
                            std::map<std::string, std::string>& sample_dict, std::vector<MEI_breakpoint>& breakpoints,
                            UserDefinedSettings* userSettings) {
    std::vector<SPLIT_READ> split_reads;
    int outer_read_pos = (cluster_strand == Minus)? cluster.at(cluster.size()-1)->pos : cluster.at(0)->pos;
//    int inner_read_pos = (cluster_strand == Minus)? cluster.at(0)->pos : cluster.at(cluster.size()-1)->pos;
    get_split_reads_for_cluster(bam_sources, cluster_strand, outer_read_pos, chromosome, split_reads);
    
    // Search for split reads with a mate close to the outer read of the
    // cluster.  Store candidate breakpoints.
    // Todo: speedup by exploiting the fact that both clusters and split reads are sorted
    // by mapping location.
    std::map<int, std::vector<simple_read> > bio_candidate_breakpoints;
    for (size_t i = 0; i < split_reads.size(); i++) {
        SPLIT_READ read = split_reads.at(i);
        
        char anchor_strand = read.MatchedD;
        if (cluster_strand != anchor_strand) {
            continue;
        }
        
        unsigned int comp_candidate_bp = read.getLastAbsLocCloseEnd();
        unsigned int bio_candidate_bp = get_bio_chr_index(comp_candidate_bp);
        
        if (bio_candidate_breakpoints.find(bio_candidate_bp) == bio_candidate_breakpoints.end()) {
            // New candidate, look ahead to check whether there are enough supporting split reads.
            int SR_support = 1;
            for (size_t j = i + 1; j < split_reads.size(); j++) {
                if (split_reads.at(j).getLastAbsLocCloseEnd() == comp_candidate_bp && 
                    split_reads.at(j).MatchedD == cluster_strand) {
                    SR_support++;
                }
            }
            if (SR_support < userSettings->MIN_DD_BREAKPOINT_SUPPORT) {
                // Not enough support, skip it.
                continue;
            } else {
                std::vector<simple_read> new_bp_split_reads;
                bio_candidate_breakpoints.insert(std::make_pair(bio_candidate_bp, new_bp_split_reads));
            }
        }
        
        // Store the unmatched sequence as it should be matched on the opposite strand of
        // the mapped mate.
        std::string whole_sequence;
        std::string mapped_part;
        std::string unmapped_part;
        if (anchor_strand == Plus) {
            whole_sequence = ReverseComplement(read.getUnmatchedSeq());
            mapped_part = whole_sequence.substr(0, read.CloseEndLength);
            unmapped_part = whole_sequence.substr(read.CloseEndLength, whole_sequence.length());
        } else {
            whole_sequence = read.getUnmatchedSeq();
            mapped_part = whole_sequence.substr(whole_sequence.length() - read.CloseEndLength, 
                                                whole_sequence.length());
            unmapped_part = whole_sequence.substr(0, whole_sequence.length() - read.CloseEndLength);
        }

        std::string sample_name;
        get_sample_name(read.read_group, sample_dict, sample_name);
        simple_read simple_split_read(read.Name, -1, -1, '?', sample_name, whole_sequence, mapped_part, 
                                      unmapped_part);
        (*bio_candidate_breakpoints.find(bio_candidate_bp)).second.push_back(simple_split_read);
    }
    
  
    char SR_mapping_strand = (cluster_strand == Plus)? Minus : Plus;
    
    // Remove any split reads for which a far end can be found locally, these are
    // assumed to contribute to some local variants.
    // Todo: determine region that is searched for far end.
    std::map<int, std::vector<simple_read> >::iterator map_iter;
    for (map_iter = bio_candidate_breakpoints.begin(); map_iter != bio_candidate_breakpoints.end(); ++map_iter) {
        
        std::string mapped_consensus = get_consensus_unmapped((*map_iter).second, SR_mapping_strand);
        std::vector<simple_read> sreads = (*map_iter).second;
        if (mapped_consensus.length() == 0) {
            LOG_DEBUG(*logStream << time_log() << "Consensus building failed for split read mapping ends (" << 
                      map_iter->second.size() << " reads @ " << map_iter->first << ")" << std::endl);
            continue;
        }
        int bio_bp = (*map_iter).first;
                
        // If far end consensus is not found in local window, store breakpoint.
        size_t FE_window_start = std::max(0, get_comp_chr_index(bio_bp) - userSettings->MIN_DD_MAP_DISTANCE);
        size_t FE_window_size = std::min(chromosome->getCompSize() - (unsigned) FE_window_start, 
                                         2 * (unsigned) userSettings->MIN_DD_MAP_DISTANCE);
        if (!contains_subseq_any_strand(mapped_consensus, chromosome->getSeq().substr(FE_window_start, 
                FE_window_size), MIN_CONSENSUS_LENGTH)) {
            MEI_breakpoint bp(cluster_tid, bio_bp, cluster_strand);
            bp.associated_split_reads = (*map_iter).second;

            // Link associated discordant reads (all reads from cluster) and split reads.
            std::vector<simple_read*>::iterator read_iter;
            for (read_iter = cluster.begin(); read_iter != cluster.end(); ++read_iter) {
                bp.associated_reads.push_back(*(*read_iter));
            }
            breakpoints.push_back(bp);
        }
    }
}
Esempio n. 11
0
void CombineReads(const std::string & CurrentChrSeq, const char & Strand, const std::vector <SPLIT_READ> & input_reads, const std::vector <unsigned int> & Index_Of_Useful_Reads, std::vector <SPLIT_READ> & output_reads) {
    //std::cout << "start of CombineReads" << std::endl;
    std::string Spacer = "";
    unsigned Max_ReadLength = 0;
    unsigned Max_AssembledLength = 0;
    unsigned Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[0]].LeftMostPos;
    SPLIT_READ output_one_read;// = input_reads[Index_Of_Useful_Reads[0]];
    unsigned Index2Read4Copy = 0;
    //unsigned Min_Close_Size = 10000;
    for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
        //if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size) {
        //    Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size();
        //    Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]]
        //}
        //std::cout << Strand << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() << std::endl;
        //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq << std::endl;
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos < (int)Min_LeftMostPos)
            Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos;
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > (short)Max_ReadLength)
            Max_ReadLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength();
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + (unsigned)input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > Max_AssembledLength) 
            Max_AssembledLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength();
    }
    Max_AssembledLength = Max_AssembledLength - Min_LeftMostPos;
    if ((float)Max_AssembledLength < Max_ReadLength * 1.3) return;
    std::cout << "Max_AssembledLength " << Max_AssembledLength << std::endl;
    for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
        Spacer.clear();
        
        if (Strand == '+') {
            for (unsigned SpacerIndex = 0; SpacerIndex < Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); SpacerIndex++) Spacer += " ";
            //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl;
        }
        else {
            for (unsigned SpacerIndex = 0; SpacerIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos; SpacerIndex++) 
                Spacer += " ";
            //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl;
        }
    }
    if (Strand == '+') { // UnmatchedSeq
        std::cout << "+ Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos 
        << "\nref: \n" << ReverseComplement(CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength)) << std::endl;
    }
    else if (Strand == '-') {
        std::cout << "- Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos 
        << "\nref: \n" << CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength) << std::endl;
    }
    unsigned Count[5][Max_AssembledLength];
    float Ratio[5][Max_AssembledLength];
    for (short i = 0; i < 5; i++) {
        for (unsigned j = 0; j < Max_AssembledLength; j++) {
            Count[i][j] = 0;
            Ratio[i][j] = 0.0;
        }
    }
    if (Strand == '+') {
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            //std::cout << std::endl;
            for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) {
                //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex];
                switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) {
                    case 'A':
                        Count[0][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00000000
                    case 'C':
                        Count[1][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00010000
                    case 'G':
                        Count[2][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00100000
                    case 'T':
                        Count[3][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00110000
                    default:
                        Count[4][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        // 01000000
                }
            }
        }
    }
    else if (Strand == '-') {
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            //std::cout << std::endl;
            for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) {
                //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex];
                switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) {
                    case 'A':
                        Count[0][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00000000
                    case 'C':
                        Count[1][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00010000
                    case 'G':
                        Count[2][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00100000
                    case 'T':
                        Count[3][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00110000
                    default:
                        Count[4][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        // 01000000
                }
            }
        }
    }

    float Sum;
    for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) {
        Sum = Count[0][PosIndex] + Count[1][PosIndex] + Count[2][PosIndex] + Count[3][PosIndex] + Count[4][PosIndex];
        //std::cout << Count[0][PosIndex] << " " << Count[1][PosIndex] << " " << Count[2][PosIndex] << " " << Count[3][PosIndex] << " " << Count[4][PosIndex] << std::endl;
        for (unsigned BaseIndex = 0; BaseIndex < 5; BaseIndex++) {
            Ratio[BaseIndex][PosIndex] = Count[BaseIndex][PosIndex] / Sum;
        }
    }
    std::string OutputOneStr = "";
    const float RatioCutoff = 0.66;
    unsigned Max_Base_Count = 0;
    short Max_Base_Count_Index = -1;
    for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) {
        if (Ratio[0][PosIndex] > RatioCutoff) {
            OutputOneStr += "A";
            continue;
        }
        if (Ratio[1][PosIndex] > RatioCutoff) {
            OutputOneStr += "C";
            continue;
        }
        if (Ratio[2][PosIndex] > RatioCutoff) {
            OutputOneStr += "G";
            continue;
        }
        if (Ratio[3][PosIndex] > RatioCutoff) {
            OutputOneStr += "T";
            continue;
        }
        for (short BaseIndex = 0; BaseIndex < 4; BaseIndex++) {
            if (Count[BaseIndex][PosIndex] > Max_Base_Count && Count[BaseIndex][PosIndex] >= 3)
                Max_Base_Count_Index = BaseIndex;
        }
        //if (Max_Base_Count_Index != -1) {
            switch (Max_Base_Count_Index) {
                case 0:
                    OutputOneStr += "A";
                    break;
                case 1:
                    OutputOneStr += "C";
                    break;
                case 2:
                    OutputOneStr += "G";
                    break;
                case 3:
                    OutputOneStr += "T";
                    break;
                case -1:
                    OutputOneStr += "N";
                    break;
                default:
                    break;
            }
        //}
        //else OutputOneStr += "N";
    }
    if (Strand == '+') {
        std::cout << "Final merged string +: original\n" << (OutputOneStr) << std::endl;
        std::cout << "Final merged string +: convert to ref\n" << ReverseComplement(OutputOneStr) << std::endl;
    }
    else {
        std::cout << "Final merged string -: original\n" << (OutputOneStr) << std::endl;
        std::cout << "Final merged string : convert to ref\n" << (OutputOneStr) << std::endl;
    }//std::cout << "Final merged string: -\n" << (OutputOneStr) << std::endl;
    //std::cout << "here1" << std::endl;
    unsigned Count_N = 0;
    for (unsigned pos_index = 0;  pos_index < OutputOneStr.size(); pos_index++) {
        if (OutputOneStr[pos_index] == 'N') Count_N++;
    }
    if ((float)Count_N < OutputOneStr.size() * 0.05) {
        
        unsigned Min_Close_Size = 10000;
        Index2Read4Copy = 0; // if the best one cannot be found due to N or whatever reasons, use the first read as the template for copy.
        //std::cout << "Original Index2Read4Copy: " << Index2Read4Copy << std::endl;
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size && OutputOneStr.find(input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()) !=std::string::npos) { // quick fix here: need more work
                Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size();
                Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]]
                //std::cout << "Changed Index2Read4Copy: " << Index2Read4Copy << std::endl;
            }
        }
        //std::cout << "here2" << std::endl;
        output_one_read = input_reads[Index_Of_Useful_Reads[Index2Read4Copy]];
        //std::cout << "here2a" << std::endl;
        output_one_read.setUnmatchedSeq( OutputOneStr );
        //update std::map <std::string, int> ReadCountPerSample;
        GetReadCountPerSample(input_reads, Index_Of_Useful_Reads, output_one_read);
        //std::cout << "here2b" << std::endl;
        //std::cout << "Before: " << output_one_read.UP_Close.size() << std::endl;
        output_one_read.UP_Close.clear();
        //std::cout << "here3" << std::endl;
        output_one_read.Thickness = Index_Of_Useful_Reads.size();
        //std::cout << "Thickness " << output_one_read.Thickness << std::endl;
        GetCloseEnd(CurrentChrSeq, output_one_read);
        //std::cout << "After: " << output_one_read.UP_Close.size() << std::endl;
        output_reads.push_back(output_one_read);
        //std::cout << "here4" << std::endl;
        //std::cout << "end of CombineReads" << std::endl;
    }
}