// Given a SMRT sequence and a subread interval, make the subread. // Input: // smrtRead - a SMRT sequence // subreadInterval - a subread interval // params - mapping parameters // Output: // subreadSequence - the constructed subread void MakeSubreadOfInterval(SMRTSequence & subreadSequence, SMRTSequence & smrtRead, ReadInterval & subreadInterval, MappingParameters & params) { int start = subreadInterval.start; int end = subreadInterval.end; assert(smrtRead.length >= subreadSequence.length); smrtRead.MakeSubreadAsMasked(subreadSequence, start, end); if (!params.preserveReadTitle) { smrtRead.SetSubreadTitle(subreadSequence, subreadSequence.SubreadStart(), subreadSequence.SubreadEnd()); } else { subreadSequence.CopyTitle(smrtRead.title); } subreadSequence.zmwData = smrtRead.zmwData; }
void MakeVirtualRead(SMRTSequence & smrtRead, const vector<SMRTSequence> & subreads) { assert(subreads.size() > 0); DNALength hqStart = 0, hqEnd = 0; for(auto subread: subreads) { hqStart = min(DNALength(subread.SubreadStart()), hqStart); hqEnd = max(DNALength(subread.SubreadEnd()), hqEnd); } smrtRead.Free(); smrtRead.Allocate(hqEnd); memset(smrtRead.seq, 'N', sizeof(char) * hqEnd); smrtRead.lowQualityPrefix = hqStart; smrtRead.lowQualitySuffix = smrtRead.length - hqEnd; smrtRead.highQualityRegionScore = subreads[0].highQualityRegionScore; smrtRead.HoleNumber(subreads[0].HoleNumber()); stringstream ss; ss << SMRTTitle(subreads[0].GetTitle()).MovieName() << "/" << subreads[0].HoleNumber(); smrtRead.CopyTitle(ss.str()); for (auto subread: subreads) { memcpy(&smrtRead.seq[subread.SubreadStart()], &subread.seq[0], sizeof(char) * subread.length); } }
void SMRTSequence::SetSubreadTitle(SMRTSequence &subread, DNALength subreadStart, DNALength subreadEnd) { stringstream titleStream; titleStream << title << "/"<< subreadStart << "_" << subreadEnd; subread.CopyTitle(titleStream.str()); }
int main(int argc, char* argv[]) { string inputFileName, outputFileName; if (argc < 2) { PrintUsage(); exit(0); } vector<string> inputFileNames; inputFileName = argv[1]; outputFileName = argv[2]; int argi = 3; RegionTable regionTable; string regionsFOFNName = ""; vector<string> regionFileNames; bool splitSubreads = true; bool useCCS = false; int minSubreadLength = 1; while (argi < argc) { if (strcmp(argv[argi], "-regionTable") == 0) { regionsFOFNName = argv[++argi]; } else if (strcmp(argv[argi], "-noSplitSubreads") == 0) { splitSubreads = false; } else if (strcmp(argv[argi], "-minSubreadLength") == 0) { minSubreadLength = atoi(argv[++argi]); } else if (strcmp(argv[argi], "-useccsdenovo") == 0) { useCCS = true; } else { PrintUsage(); cout << "ERROR! Option " << argv[argi] << " is not supported." << endl; } argi++; } if (FileOfFileNames::IsFOFN(inputFileName)) { FileOfFileNames::FOFNToList(inputFileName, inputFileNames); } else { inputFileNames.push_back(inputFileName); } if (regionsFOFNName == "") { regionFileNames = inputFileNames; } else { if (FileOfFileNames::IsFOFN(regionsFOFNName)) { FileOfFileNames::FOFNToList(regionsFOFNName, regionFileNames); } else { regionFileNames.push_back(regionsFOFNName); } } ofstream fastaOut; CrucialOpen(outputFileName, fastaOut); int plsFileIndex; HDFRegionTableReader hdfRegionReader; AfgBasWriter afgWriter; afgWriter.Initialize(outputFileName); for (plsFileIndex = 0; plsFileIndex < inputFileNames.size(); plsFileIndex++) { if (splitSubreads) { hdfRegionReader.Initialize(regionFileNames[plsFileIndex]); hdfRegionReader.ReadTable(regionTable); regionTable.SortTableByHoleNumber(); } ReaderAgglomerate reader; // reader.SkipReadQuality(); // should have been taken care of by *Filter modules if (useCCS){ reader.UseCCS(); } else { reader.IgnoreCCS(); } reader.Initialize(inputFileNames[plsFileIndex]); CCSSequence seq; int seqIndex = 0; int numRecords = 0; vector<ReadInterval> subreadIntervals; while (reader.GetNext(seq)){ ++seqIndex; if (splitSubreads == false) { if (seq.length >= minSubreadLength) { afgWriter.Write(seq); } seq.Free(); continue; } DNALength hqReadStart, hqReadEnd; int score; GetReadTrimCoordinates(seq, seq.zmwData, regionTable, hqReadStart, hqReadEnd, score); subreadIntervals.clear(); // clear old, new intervals are appended. CollectSubreadIntervals(seq,®ionTable, subreadIntervals); if (seq.length == 0 and subreadIntervals.size() > 0) { cout << "WARNING! A high quality interval region exists for a read of length 0." <<endl; cout << " The offending ZMW number is " << seq.zmwData.holeNumber << endl; seq.Free(); continue; } for (int intvIndex = 0; intvIndex < subreadIntervals.size(); intvIndex++) { SMRTSequence subreadSequence; int subreadStart = subreadIntervals[intvIndex].start > hqReadStart ? subreadIntervals[intvIndex].start : hqReadStart; int subreadEnd = subreadIntervals[intvIndex].end < hqReadEnd ? subreadIntervals[intvIndex].end : hqReadEnd; int subreadLength = subreadEnd - subreadStart; if (subreadLength < minSubreadLength) continue; subreadSequence.subreadStart = subreadStart; subreadSequence.subreadEnd = subreadEnd; subreadSequence.ReferenceSubstring(seq, subreadStart, subreadLength); stringstream titleStream; titleStream << seq.title << "/" << subreadIntervals[intvIndex].start << "_" << subreadIntervals[intvIndex].end; subreadSequence.CopyTitle(titleStream.str()); afgWriter.Write(subreadSequence); delete[] subreadSequence.title; } seq.Free(); } reader.Close(); hdfRegionReader.Close(); } }