Exemplo n.º 1
0
/* 'ReadBuffer::flush' writes the current contents of the buffer to the
   designated output file, and then clears the contents to be ready to receive
   the next reads. */
void ReadBuffer::flush()
{
   // std::cout << "in flush " << std::endl;
   #pragma omp parallel for
   for (int i=0; i<m_currentsize ; i++ ) {
      // std::cout << "before GetCloseEnd " << std::endl;
      GetCloseEnd(m_CHROMOSOME, m_rawreads[i]);
      // std::cout << "after GetCloseEnd " << std::endl;
      if (m_rawreads[i].hasCloseEnd()) {
          //if (m_rawreads[i].Name == "@DD7DT8Q1:4:1106:17724:13906#GTACCT/1") {
          //    std::cout << "m_rawreads[i].hasCloseEnd()" << std::endl;
          //}
         updateReadAfterCloseEndMapping(m_rawreads[i]);
         #pragma omp critical
         m_filteredReads.push_back(m_rawreads[i]);
      }
      else {
          //if (m_rawreads[i].Name == "@DD7DT8Q1:4:1106:17724:13906#GTACCT/1") {
          //    std::cout << "m_rawreads[i] no close end" << std::endl;
          //}
         #pragma omp critical
         m_OneEndMappedReads.push_back(m_rawreads[i]);   
      }
   }
    //std::cout << "end of flush " << std::endl;
   m_rawreads.clear();
   m_currentsize = 0;
    //std::cout << "existing flush " << std::endl;
}
Exemplo n.º 2
0
/* 'ReadBuffer::flush' writes the current contents of the buffer to the
   designated output file, and then clears the contents to be ready to receive
   the next reads. */
void ReadBuffer::flush()
{
	// std::cout << "in flush " << std::endl;
	#pragma omp parallel for
	for (int i=0; i<m_currentsize ; i++ ) {
		// std::cout << "before GetCloseEnd " << std::endl;
		//std::map<std::string, unsigned>::iterator it = g_ReadSeq2Index.find(m_rawreads[i].UnmatchedSeq);
       
		//if (it == g_ReadSeq2Index.end()) 
		{
		//if (m_rawreads[i].MapperSplit ) {
		//	std::cout << "skip close end search" << std::endl;
		//}
            if (m_rawreads[i].MapperSplit == false)
                GetCloseEnd(m_CHROMOSOME, m_rawreads[i]);
		
			if (m_rawreads[i].hasCloseEnd()) {
 				updateReadAfterCloseEndMapping(m_rawreads[i]);
               
 				#pragma omp critical 
 				{
		//			g_ReadSeq2Index.insert(std::pair<std::string, unsigned> (m_rawreads[i].UnmatchedSeq, m_filteredReads.size()));
					m_rawreads[i].SampleName2Number.insert(std::pair <std::string, unsigned> (m_rawreads[i].Tag, 1));
					m_filteredReads.push_back(m_rawreads[i]);
				}
               
			}
           	//	else {
               			//if (m_rawreads[i].Name == "@DD7DT8Q1:4:1106:17724:13906#GTACCT/1") {
              		 	//    std::cout << "m_rawreads[i] no close end" << std::endl;
               			//}
               			//#pragma omp critical
               			//m_OneEndMappedReads.push_back(m_rawreads[i]);
           	//	}
       		}
       		//else { // SampleName2Number std::map <std::string, unsigned> SampleName2Number;
           	//	#pragma omp critical 
           	//	{
		//		unsigned ReadIndex = it -> second; // m_filteredReads[ReadIndex]
               	//		std::map <std::string, unsigned>::iterator it_SampleName = m_filteredReads[ReadIndex].SampleName2Number.find(m_rawreads[i].Tag);
//
  //             			if (it_SampleName == m_filteredReads[ReadIndex].SampleName2Number.end()) {
//					//std::cout << "adding " << m_rawreads[i].Tag << "\t1" << std::endl;
  //                 			m_filteredReads[ReadIndex].SampleName2Number.insert(std::pair <std::string, unsigned> (m_rawreads[i].Tag, 1));
    //           			}
      //         			else {
	//				
	//				it_SampleName -> second++;
	//				//std::cout << "increasing " << m_rawreads[i].Tag << "\t" << it_SampleName -> second << std::endl;
	//			}
	//		}
       		//}

      		// std::cout << "after GetCloseEnd " << std::endl;

   	}
    	//std::cout << "end of flush " << std::endl;
   	m_rawreads.clear();
   	m_currentsize = 0;
    	//std::cout << "existing flush " << std::endl;
}
Exemplo n.º 3
0
void CombineReads(const std::string & CurrentChrSeq, const char & Strand, const std::vector <SPLIT_READ> & input_reads, const std::vector <unsigned int> & Index_Of_Useful_Reads, std::vector <SPLIT_READ> & output_reads) {
    //std::cout << "start of CombineReads" << std::endl;
    std::string Spacer = "";
    unsigned Max_ReadLength = 0;
    unsigned Max_AssembledLength = 0;
    unsigned Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[0]].LeftMostPos;
    SPLIT_READ output_one_read;// = input_reads[Index_Of_Useful_Reads[0]];
    unsigned Index2Read4Copy = 0;
    //unsigned Min_Close_Size = 10000;
    for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
        //if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size) {
        //    Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size();
        //    Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]]
        //}
        //std::cout << Strand << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() << std::endl;
        //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos << " " << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq << std::endl;
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos < (int)Min_LeftMostPos)
            Min_LeftMostPos = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos;
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > (short)Max_ReadLength)
            Max_ReadLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength();
        if (input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + (unsigned)input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() > Max_AssembledLength) 
            Max_AssembledLength = input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos + input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength();
    }
    Max_AssembledLength = Max_AssembledLength - Min_LeftMostPos;
    if ((float)Max_AssembledLength < Max_ReadLength * 1.3) return;
    std::cout << "Max_AssembledLength " << Max_AssembledLength << std::endl;
    for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
        Spacer.clear();
        
        if (Strand == '+') {
            for (unsigned SpacerIndex = 0; SpacerIndex < Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); SpacerIndex++) Spacer += " ";
            //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl;
        }
        else {
            for (unsigned SpacerIndex = 0; SpacerIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos; SpacerIndex++) 
                Spacer += " ";
            //std::cout << Spacer << (input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq) << std::endl;
        }
    }
    if (Strand == '+') { // UnmatchedSeq
        std::cout << "+ Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos 
        << "\nref: \n" << ReverseComplement(CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength)) << std::endl;
    }
    else if (Strand == '-') {
        std::cout << "- Max_ReadLength " << Max_ReadLength << "\t" << "Min_LeftMostPos " << Min_LeftMostPos 
        << "\nref: \n" << CurrentChrSeq.substr(Min_LeftMostPos, Max_AssembledLength) << std::endl;
    }
    unsigned Count[5][Max_AssembledLength];
    float Ratio[5][Max_AssembledLength];
    for (short i = 0; i < 5; i++) {
        for (unsigned j = 0; j < Max_AssembledLength; j++) {
            Count[i][j] = 0;
            Ratio[i][j] = 0.0;
        }
    }
    if (Strand == '+') {
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            //std::cout << std::endl;
            for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) {
                //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex];
                switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) {
                    case 'A':
                        Count[0][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00000000
                    case 'C':
                        Count[1][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00010000
                    case 'G':
                        Count[2][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00100000
                    case 'T':
                        Count[3][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        break;	// 00110000
                    default:
                        Count[4][Max_AssembledLength + Min_LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength() + BaseIndex]++;
                        // 01000000
                }
            }
        }
    }
    else if (Strand == '-') {
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            //std::cout << std::endl;
            for (short BaseIndex = 0; BaseIndex < input_reads[Index_Of_Useful_Reads[ReadIndex]].getReadLength(); BaseIndex++) {
                //std::cout << input_reads[Index_Of_Useful_Reads[ReadIndex]].UnmatchedSeq[BaseIndex];
                switch (input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()[BaseIndex]) {
                    case 'A':
                        Count[0][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00000000
                    case 'C':
                        Count[1][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00010000
                    case 'G':
                        Count[2][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00100000
                    case 'T':
                        Count[3][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        break;	// 00110000
                    default:
                        Count[4][input_reads[Index_Of_Useful_Reads[ReadIndex]].LeftMostPos - Min_LeftMostPos + BaseIndex]++;
                        // 01000000
                }
            }
        }
    }

    float Sum;
    for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) {
        Sum = Count[0][PosIndex] + Count[1][PosIndex] + Count[2][PosIndex] + Count[3][PosIndex] + Count[4][PosIndex];
        //std::cout << Count[0][PosIndex] << " " << Count[1][PosIndex] << " " << Count[2][PosIndex] << " " << Count[3][PosIndex] << " " << Count[4][PosIndex] << std::endl;
        for (unsigned BaseIndex = 0; BaseIndex < 5; BaseIndex++) {
            Ratio[BaseIndex][PosIndex] = Count[BaseIndex][PosIndex] / Sum;
        }
    }
    std::string OutputOneStr = "";
    const float RatioCutoff = 0.66;
    unsigned Max_Base_Count = 0;
    short Max_Base_Count_Index = -1;
    for (unsigned PosIndex = 0; PosIndex < Max_AssembledLength; PosIndex++) {
        if (Ratio[0][PosIndex] > RatioCutoff) {
            OutputOneStr += "A";
            continue;
        }
        if (Ratio[1][PosIndex] > RatioCutoff) {
            OutputOneStr += "C";
            continue;
        }
        if (Ratio[2][PosIndex] > RatioCutoff) {
            OutputOneStr += "G";
            continue;
        }
        if (Ratio[3][PosIndex] > RatioCutoff) {
            OutputOneStr += "T";
            continue;
        }
        for (short BaseIndex = 0; BaseIndex < 4; BaseIndex++) {
            if (Count[BaseIndex][PosIndex] > Max_Base_Count && Count[BaseIndex][PosIndex] >= 3)
                Max_Base_Count_Index = BaseIndex;
        }
        //if (Max_Base_Count_Index != -1) {
            switch (Max_Base_Count_Index) {
                case 0:
                    OutputOneStr += "A";
                    break;
                case 1:
                    OutputOneStr += "C";
                    break;
                case 2:
                    OutputOneStr += "G";
                    break;
                case 3:
                    OutputOneStr += "T";
                    break;
                case -1:
                    OutputOneStr += "N";
                    break;
                default:
                    break;
            }
        //}
        //else OutputOneStr += "N";
    }
    if (Strand == '+') {
        std::cout << "Final merged string +: original\n" << (OutputOneStr) << std::endl;
        std::cout << "Final merged string +: convert to ref\n" << ReverseComplement(OutputOneStr) << std::endl;
    }
    else {
        std::cout << "Final merged string -: original\n" << (OutputOneStr) << std::endl;
        std::cout << "Final merged string : convert to ref\n" << (OutputOneStr) << std::endl;
    }//std::cout << "Final merged string: -\n" << (OutputOneStr) << std::endl;
    //std::cout << "here1" << std::endl;
    unsigned Count_N = 0;
    for (unsigned pos_index = 0;  pos_index < OutputOneStr.size(); pos_index++) {
        if (OutputOneStr[pos_index] == 'N') Count_N++;
    }
    if ((float)Count_N < OutputOneStr.size() * 0.05) {
        
        unsigned Min_Close_Size = 10000;
        Index2Read4Copy = 0; // if the best one cannot be found due to N or whatever reasons, use the first read as the template for copy.
        //std::cout << "Original Index2Read4Copy: " << Index2Read4Copy << std::endl;
        for (unsigned ReadIndex = 0; ReadIndex < Index_Of_Useful_Reads.size(); ReadIndex++) {
            if (input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size() < Min_Close_Size && OutputOneStr.find(input_reads[Index_Of_Useful_Reads[ReadIndex]].getUnmatchedSeq()) !=std::string::npos) { // quick fix here: need more work
                Min_Close_Size = input_reads[Index_Of_Useful_Reads[ReadIndex]].UP_Close.size();
                Index2Read4Copy = ReadIndex; // input_reads[Index_Of_Useful_Reads[Index2Read4Copy]]
                //std::cout << "Changed Index2Read4Copy: " << Index2Read4Copy << std::endl;
            }
        }
        //std::cout << "here2" << std::endl;
        output_one_read = input_reads[Index_Of_Useful_Reads[Index2Read4Copy]];
        //std::cout << "here2a" << std::endl;
        output_one_read.setUnmatchedSeq( OutputOneStr );
        //update std::map <std::string, int> ReadCountPerSample;
        GetReadCountPerSample(input_reads, Index_Of_Useful_Reads, output_one_read);
        //std::cout << "here2b" << std::endl;
        //std::cout << "Before: " << output_one_read.UP_Close.size() << std::endl;
        output_one_read.UP_Close.clear();
        //std::cout << "here3" << std::endl;
        output_one_read.Thickness = Index_Of_Useful_Reads.size();
        //std::cout << "Thickness " << output_one_read.Thickness << std::endl;
        GetCloseEnd(CurrentChrSeq, output_one_read);
        //std::cout << "After: " << output_one_read.UP_Close.size() << std::endl;
        output_reads.push_back(output_one_read);
        //std::cout << "here4" << std::endl;
        //std::cout << "end of CombineReads" << std::endl;
    }
}