void rmdup() { StringVector hitsFilenames; BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate); BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate); OverlapAlgorithm* pOverlapper = new OverlapAlgorithm(pBWT, pRBWT, opt::errorRate, 0, 0, false); Timer* pTimer = new Timer(PROGRAM_IDENT); if(opt::numThreads <= 1) { printf("[%s] starting serial-mode overlap computation\n", PROGRAM_IDENT); computeRmdupHitsSerial(opt::prefix, opt::readsFile, pOverlapper, hitsFilenames); } else { printf("[%s] starting parallel-mode overlap computation with %d threads\n", PROGRAM_IDENT, opt::numThreads); computeRmdupHitsParallel(opt::numThreads, opt::prefix, opt::readsFile, pOverlapper, hitsFilenames); } delete pOverlapper; delete pBWT; delete pRBWT; delete pTimer; std::string out_prefix = stripExtension(opt::outFile); std::string dupsFile = parseDupHits(hitsFilenames, out_prefix); // Rebuild the indices without the duplicated sequences if(opt::bReindex) { std::cout << "Rebuilding indices without duplicated reads\n"; removeReadsFromIndices(opt::prefix, dupsFile, out_prefix, BWT_EXT, SAI_EXT, false, opt::numThreads); removeReadsFromIndices(opt::prefix, dupsFile, out_prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads); } }
// // Main // int filterMain(int argc, char** argv) { parseFilterOptions(argc, argv); Timer* pTimer = new Timer(PROGRAM_IDENT); BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate); BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate); pBWT->printInfo(); std::ostream* pWriter = createWriter(opt::outFile); std::ostream* pDiscardWriter = createWriter(opt::discardFile); QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter); // If performing duplicate check, create a bitvector to record // which reads are duplicates BitVector* pSharedBV = NULL; if(opt::dupCheck) pSharedBV = new BitVector(pBWT->getNumStrings()); // Set up QC parameters QCParameters params; params.pBWT = pBWT; params.pRevBWT = pRBWT; params.pSharedBV = pSharedBV; params.checkDuplicates = opt::dupCheck; params.substringOnly = opt::substringOnly; params.checkKmer = opt::kmerCheck; params.kmerBothStrand = opt::kmerBothStrand; params.checkHPRuns = opt::hpCheck; params.checkDegenerate = opt::lowComplexityCheck; params.verbose = opt::verbose; params.kmerLength = opt::kmerLength; params.kmerThreshold = opt::kmerThreshold; params.hpKmerLength = 51; params.hpHardAcceptCount = 10; params.hpMinProportion = 0.1f; params.hpMinLength = 6; if(opt::numThreads <= 1) { // Serial mode QCProcess processor(params); PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor); } else { // Parallel mode std::vector<QCProcess*> processorVector; for(int i = 0; i < opt::numThreads; ++i) { QCProcess* pProcessor = new QCProcess(params); processorVector.push_back(pProcessor); } PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor); for(int i = 0; i < opt::numThreads; ++i) delete processorVector[i]; } delete pPostProcessor; delete pWriter; delete pDiscardWriter; delete pBWT; delete pRBWT; if(pSharedBV != NULL) delete pSharedBV; // Rebuild the FM-index without the discarded reads std::string out_prefix = stripFilename(opt::outFile); removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, BWT_EXT, SAI_EXT, false, opt::numThreads); removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads); // Cleanup delete pTimer; if(opt::numThreads > 1) pthread_exit(NULL); return 0; }