int searchTandemDuplications(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window) { static int Count_TD = 0; static int Count_TD_Plus = 0; static int Count_TD_Minus = 0; std::vector<unsigned> TD[NumBoxes]; //UserDefinedSettings* userSettings = UserDefinedSettings::Instance(); unsigned TempBoxIndex; LOG_INFO(*logStream << "Searching tandem duplication events ... " << std::endl); //std::cout << "std1" << std::endl; for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) { SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex]; //std::cout << "std1a" << std::endl; if (currentRead.Used || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) { continue; } //std::cout << "std1b" << std::endl; if (currentRead.MatchedD == Plus) { //std::cout << "in TD plus" << std::endl; for (short MAX_SNP_ERROR_index = 0; MAX_SNP_ERROR_index <= currentRead.getMAX_SNP_ERROR(); MAX_SNP_ERROR_index++) { //std::cout << "std1c" << std::endl; for (unsigned int CloseIndex = 0; CloseIndex < currentRead.UP_Close.size(); CloseIndex++) { //std::cout << "std1d" << std::endl; if (currentRead.Used /* || currentRead. BPLeft == 0*/) { break; } if (currentRead.UP_Close[CloseIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } //std::cout << "std1e " << currentRead.UP_Far.size() << std::endl; for (int FarIndex = currentRead.UP_Far.size() - 1; FarIndex >= 0; FarIndex--) { //std::cout << "std1f" << std::endl; if (currentRead.Used) { break; } if (currentRead.UP_Far[FarIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } //std::cout << "std1g" << std::endl; if (currentRead.UP_Far[FarIndex].Direction == Minus) { if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr == currentRead.getReadLength() && currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr < currentRead.UP_Close[CloseIndex].AbsLoc && currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.UP_Close[CloseIndex].AbsLoc) { currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + 1; currentRead.Left = currentRead. UP_Far[FarIndex].AbsLoc + currentRead. UP_Far[FarIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead. UP_Far[FarIndex].AbsLoc + 1; currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; if (currentRead. BPLeft == 0) { continue; } LeftMostTD(currentState, currentRead, window); if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle( currentRead, currentState.FutureReads_SR); } else { if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) { TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize; if (TempBoxIndex < NumBoxes) { //std::cout << currentRead << std::endl; TD[TempBoxIndex]. push_back(ReadIndex); currentRead.Used = true; Count_TD++; Count_TD_Plus++; } } } } } } } } } else if (currentRead.MatchedD == Minus) { //std::cout << "in Minus" << std::endl; for (short MAX_SNP_ERROR_index = 0; MAX_SNP_ERROR_index <= currentRead.getMAX_SNP_ERROR(); MAX_SNP_ERROR_index++) { //std::cout << "in Minus 1" << std::endl; for (int CloseIndex = currentRead.UP_Close.size() - 1; CloseIndex >= 0; CloseIndex--) { //std::cout << "in Minus 2" << std::endl; if (currentRead.Used /*|| currentRead. BPLeft == 0*/) { break; } if (currentRead.UP_Close[CloseIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } //std::cout << "in Minus 3" << std::endl; for (int FarIndex = 0; FarIndex < (int) currentRead.UP_Far.size(); FarIndex++) { if (currentRead.Used) { break; } if (currentRead.UP_Far[FarIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentRead.UP_Far[FarIndex]. Direction == Plus) { //std::cout << "BPLeft" << currentRead. BPLeft << std::endl; if (currentRead.UP_Close[CloseIndex].LengthStr + currentRead.UP_Far[FarIndex].LengthStr == currentRead.getReadLength() && currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.UP_Far[FarIndex]. AbsLoc && currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr < currentRead.UP_Far[FarIndex]. AbsLoc) { currentRead.Right = currentRead. UP_Far[FarIndex].AbsLoc - currentRead. UP_Far[FarIndex].LengthStr + 1; currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.BP = currentRead. UP_Far[FarIndex].LengthStr - 1; currentRead.IndelSize = currentRead. UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc + 1; currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; //std::cout << "in Minus 4a" << std::endl; if (currentRead. BPLeft == 0) { continue; } LeftMostTD(currentState, currentRead, window); //std::cout << "in Minus 4b" << std::endl; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle( currentRead, currentState.FutureReads_SR); } else { if (readInSpecifiedRegion( currentRead, userSettings->getRegion())) { TempBoxIndex = (int) (currentRead. BPLeft) / BoxSize; if (TempBoxIndex < NumBoxes) { //std::cout << currentRead << std::endl; TD[TempBoxIndex]. push_back(ReadIndex); currentRead.Used = true; Count_TD++; Count_TD_Minus++; } } } } } } //std::cout << "in Minus 4" << std::endl; } } } } //std::cout << "std2" << std::endl; LOG_INFO(*logStream << "Total: " << Count_TD << "\t+" << Count_TD_Plus << "\t-" << Count_TD_Minus << std::endl); //std::cout << "TD 1" << std::endl; std::ofstream TDOutf(userSettings->getTDOutputFilename().c_str(), std::ios::app); SortAndOutputTandemDuplications(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, TD, TDOutf, false); //std::cout << "TD 2" << std::endl; //std::cout << "std3" << std::endl; for (unsigned int i = 0; i < NumBoxes; i++) { TD[i].clear(); } // std::cout << "std4" << std::endl; return EXIT_SUCCESS; }
int searchTandemDuplicationsNT(ControlState& currentState, unsigned NumBoxes, const SearchWindow& window) { static int Count_TD_NT = 0; static int Count_TD_NT_Plus = 0; static int Count_TD_NT_Minus = 0; std::vector<unsigned> TD_NT[NumBoxes]; int CloseIndex = 0; int FarIndex = 0; //UserDefinedSettings* userSettings = UserDefinedSettings::Instance(); LOG_INFO(*logStream << "Searching tandem duplication events with non-template sequence ... " << std::endl); for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads_SR.size(); ReadIndex++) { SPLIT_READ& currentRead = currentState.Reads_SR[ReadIndex]; if (currentRead.Used || currentRead.UP_Far.empty() || currentRead.FragName != currentRead.FarFragName) { continue; } CloseIndex = currentRead.UP_Close.size() - 1; FarIndex = currentRead.UP_Far.size() - 1; if (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr >= currentRead.getReadLength()) { continue; } if (currentRead.UP_Far[FarIndex].Mismatches + currentRead.UP_Close[CloseIndex].Mismatches > (short) (1 + userSettings->Seq_Error_Rate * (currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr))) { continue; } if (currentRead.MatchedD == Plus) { if (currentRead.UP_Far[FarIndex].Direction == Minus) { if (currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr < currentRead.UP_Close[CloseIndex].AbsLoc && currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.UP_Close[CloseIndex].AbsLoc && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr > userSettings->Min_Num_Matched_Bases) { currentRead.Right = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Close[CloseIndex].LengthStr + 1; currentRead.Left = currentRead.UP_Far[FarIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Close[CloseIndex].AbsLoc - currentRead.UP_Far[FarIndex].AbsLoc + 1; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Close[CloseIndex].LengthStr - currentRead.UP_Far[FarIndex].LengthStr; currentRead.NT_str = currentRead.getUnmatchedSeqRev().substr( currentRead.BP + 1, currentRead.NT_size); currentRead.BPRight = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPLeft = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) { TD_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_TD_NT++; Count_TD_NT_Plus++; } } } } } else if (currentRead.MatchedD == Minus) { if (currentRead.UP_Far[FarIndex].Direction == Plus) { if (currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr < currentRead.UP_Far[FarIndex].AbsLoc && currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Far[FarIndex].LengthStr < currentRead.UP_Far[FarIndex].AbsLoc && currentRead.UP_Far[FarIndex].LengthStr + currentRead.UP_Close[CloseIndex].LengthStr > userSettings->Min_Num_Matched_Bases) { currentRead.Right = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Far[FarIndex].LengthStr + 1; currentRead.Left = currentRead.UP_Close[CloseIndex].AbsLoc + currentRead.UP_Close[CloseIndex].LengthStr - 1; currentRead.BP = currentRead.UP_Far[FarIndex].LengthStr - 1; currentRead.IndelSize = currentRead.UP_Far[FarIndex].AbsLoc - currentRead.UP_Close[CloseIndex].AbsLoc + 1; currentRead.NT_size = currentRead.getReadLength() - currentRead.UP_Close[CloseIndex].LengthStr - currentRead.UP_Far[FarIndex].LengthStr; currentRead.NT_str = currentRead.getUnmatchedSeq(). substr( currentRead.BP + 1, currentRead.NT_size); currentRead.BPRight = currentRead.UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; currentRead.BPLeft = currentRead.UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentRead, window.getEnd())) { saveReadForNextCycle(currentRead, currentState.FutureReads_SR); } else { if ( readInSpecifiedRegion( currentRead, userSettings->getRegion())) { TD_NT[(int) currentRead. BPLeft / BoxSize]. push_back(ReadIndex); currentRead.Used = true; Count_TD_NT++; Count_TD_NT_Minus++; } } } } } } LOG_INFO(*logStream << "Total: " << Count_TD_NT << "\t+" << Count_TD_NT_Plus << "\t-" << Count_TD_NT_Minus << std::endl); std::ofstream TDOutf(userSettings->getTDOutputFilename().c_str(), std::ios::app); SortAndOutputTandemDuplications(currentState, NumBoxes, window.getChromosome()->getSeq(), currentState.Reads_SR, TD_NT, TDOutf, true); TDOutf.close(); for (unsigned int i = 0; i < NumBoxes; i++) { TD_NT[i].clear(); } return EXIT_SUCCESS; }
int searchTandemDuplications(ControlState& currentState, unsigned NumBoxes) { static int Count_TD = 0; static int Count_TD_Plus = 0; static int Count_TD_Minus = 0; std::vector<unsigned> TD[NumBoxes]; LOG_INFO(std::cout << "Searching tandem duplication events ... " << std::endl); for (unsigned ReadIndex = 0; ReadIndex < currentState.Reads.size(); ReadIndex++) { if (currentState.Reads[ReadIndex].Used || currentState.Reads[ReadIndex].UP_Far.empty()) { continue; } if (currentState.Reads[ReadIndex].MatchedD == Plus) { for (short MAX_SNP_ERROR_index = 0; MAX_SNP_ERROR_index <= currentState.Reads[ReadIndex].MAX_SNP_ERROR; MAX_SNP_ERROR_index++) { for (unsigned int CloseIndex = 0; CloseIndex < currentState.Reads[ReadIndex].UP_Close.size(); CloseIndex++) { if (currentState.Reads[ReadIndex].Used) { break; } if (currentState.Reads[ReadIndex].UP_Close[CloseIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } for (int FarIndex = currentState.Reads[ReadIndex].UP_Far.size() - 1; FarIndex >= 0; FarIndex--) { if (currentState.Reads[ReadIndex].Used) { break; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Mismatches + currentState.Reads[ReadIndex].UP_Close[CloseIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Direction == Minus) { if (currentState.Reads[ReadIndex].UP_Far[FarIndex].LengthStr + currentState.Reads[ReadIndex].UP_Close[CloseIndex].LengthStr == currentState.Reads[ReadIndex].ReadLength && currentState.Reads[ReadIndex].UP_Far[FarIndex].AbsLoc + currentState.Reads[ReadIndex].UP_Far[FarIndex].LengthStr < currentState.Reads[ReadIndex].UP_Close[CloseIndex].AbsLoc && currentState.Reads[ReadIndex].UP_Far[FarIndex].AbsLoc + currentState.Reads[ReadIndex].UP_Close[CloseIndex].LengthStr < currentState.Reads[ReadIndex].UP_Close[CloseIndex].AbsLoc) { currentState.Reads[ReadIndex].Right = currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc - currentState.Reads[ReadIndex]. UP_Close[CloseIndex].LengthStr + 1; currentState.Reads[ReadIndex].Left = currentState.Reads[ReadIndex]. UP_Far[FarIndex].AbsLoc + currentState.Reads[ReadIndex]. UP_Far[FarIndex].LengthStr - 1; currentState.Reads[ReadIndex].BP = currentState.Reads[ReadIndex]. UP_Close[CloseIndex].LengthStr - 1; currentState.Reads[ReadIndex].IndelSize = currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc - currentState.Reads[ReadIndex]. UP_Far[FarIndex].AbsLoc + 1; //currentState.Reads[ReadIndex].InsertedStr = ""; currentState.Reads[ReadIndex].BPRight = currentState.Reads[ReadIndex].UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; currentState.Reads[ReadIndex].BPLeft = currentState.Reads[ReadIndex].UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentState.Reads[ReadIndex], currentState.upperBinBorder)) { saveReadForNextCycle( currentState.Reads[ReadIndex], currentState.FutureReads); } else { if (readInSpecifiedRegion( currentState.Reads[ReadIndex], currentState.regionStartDefined, currentState.regionEndDefined, currentState.startOfRegion, currentState.endOfRegion)) { TD[(int) currentState.Reads[ReadIndex]. BPLeft / BoxSize]. push_back(ReadIndex); currentState.Reads[ReadIndex].Used = true; Count_TD++; Count_TD_Plus++; } } } } } } } } else if (currentState.Reads[ReadIndex].MatchedD == Minus) { for (short MAX_SNP_ERROR_index = 0; MAX_SNP_ERROR_index <= currentState.Reads[ReadIndex].MAX_SNP_ERROR; MAX_SNP_ERROR_index++) { for (int CloseIndex = currentState.Reads[ReadIndex].UP_Close.size() - 1; CloseIndex >= 0; CloseIndex--) { if (currentState.Reads[ReadIndex].Used) { break; } if (currentState.Reads[ReadIndex].UP_Close[CloseIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } for (int FarIndex = 0; FarIndex < (int) currentState.Reads[ReadIndex].UP_Far.size(); FarIndex++) { if (currentState.Reads[ReadIndex].Used) { break; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Mismatches + currentState.Reads[ReadIndex].UP_Close[CloseIndex]. Mismatches > MAX_SNP_ERROR_index) { continue; } if (currentState.Reads[ReadIndex].UP_Far[FarIndex]. Direction == Plus) { if (currentState.Reads[ReadIndex]. UP_Close[CloseIndex].LengthStr + currentState.Reads[ReadIndex].UP_Far[FarIndex]. LengthStr == currentState.Reads[ReadIndex].ReadLength && currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc + currentState.Reads[ReadIndex].UP_Close[CloseIndex].LengthStr < currentState.Reads[ReadIndex].UP_Far[FarIndex]. AbsLoc && currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc + currentState.Reads[ReadIndex].UP_Far[FarIndex].LengthStr < currentState.Reads[ReadIndex].UP_Far[FarIndex]. AbsLoc) { currentState.Reads[ReadIndex].Right = currentState.Reads[ReadIndex]. UP_Far[FarIndex].AbsLoc - currentState.Reads[ReadIndex]. UP_Far[FarIndex].LengthStr + 1; currentState.Reads[ReadIndex].Left = currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc + currentState.Reads[ReadIndex]. UP_Close[CloseIndex].LengthStr - 1; currentState.Reads[ReadIndex].BP = currentState.Reads[ReadIndex]. UP_Far[FarIndex].LengthStr - 1; currentState.Reads[ReadIndex].IndelSize = currentState.Reads[ReadIndex]. UP_Far[FarIndex].AbsLoc - currentState.Reads[ReadIndex]. UP_Close[CloseIndex].AbsLoc + 1; //currentState.Reads[ReadIndex].InsertedStr = ""; currentState.Reads[ReadIndex].BPRight = currentState.Reads[ReadIndex].UP_Far[FarIndex].AbsLoc - g_SpacerBeforeAfter; currentState.Reads[ReadIndex].BPLeft = currentState.Reads[ReadIndex].UP_Close[CloseIndex].AbsLoc - g_SpacerBeforeAfter; if (readTransgressesBinBoundaries( currentState.Reads[ReadIndex], currentState.upperBinBorder)) { saveReadForNextCycle( currentState.Reads[ReadIndex], currentState.FutureReads); } else { if (readInSpecifiedRegion( currentState.Reads[ReadIndex], currentState.regionStartDefined, currentState.regionEndDefined, currentState.startOfRegion, currentState.endOfRegion)) { TD[(int) currentState.Reads[ReadIndex]. BPLeft / BoxSize]. push_back(ReadIndex); currentState.Reads[ReadIndex].Used = true; Count_TD++; Count_TD_Minus++; } } } } } } } } } LOG_INFO(std::cout << "Total: " << Count_TD << "\t+" << Count_TD_Plus << "\t-" << Count_TD_Minus << std::endl); std::ofstream TDOutf(currentState.TDOutputFilename.c_str(), std::ios::app); SortAndOutputTandemDuplications(NumBoxes, currentState.CurrentChr, currentState.Reads, TD, TDOutf, false); for (unsigned int i = 0; i < NumBoxes; i++) { TD[i].clear(); } return EXIT_SUCCESS; }