void graphTrimAndSmooth (StringGraph* pGraph, size_t trimLength, bool bIsGapPrecent) { pGraph->simplify(); SGTrimVisitor trimVisit("",trimLength); SGSmoothingVisitor smoothingVisit(opt::maxIndelLength, opt::pBWT, bIsGapPrecent); //SGSimpleBubbleVisitor sbVisit(opt::pBWT,opt::kmerLength,opt::maxBubbleGapDivergence, opt::maxBubbleDivergence, opt::maxIndelLength); if (pGraph->visitP(trimVisit)) pGraph->simplify(); if (pGraph->visitP(smoothingVisit)) { pGraph->simplify(); if (pGraph->visitP(trimVisit)) pGraph->simplify(); } // SGDuplicateVisitor sgdup; // if(pGraph->visit(sgdup)) // pGraph->simplify(); // if (pGraph->visit(sbVisit)) // { // pGraph->simplify(); // if (pGraph->visitP(trimVisit)) // pGraph->simplify(); // } }
void assemble() { Timer t("sga assemble"); StringGraph* pGraph = SGUtil::loadASQG(opt::asqgFile, opt::minOverlap, true); if(opt::bExact) pGraph->setExactMode(true); pGraph->printMemSize(); // Visitor functors SGTransitiveReductionVisitor trVisit; SGGraphStatsVisitor statsVisit; SGRemodelVisitor remodelVisit; SGEdgeStatsVisitor edgeStatsVisit; SGTrimVisitor trimVisit(opt::trimLengthThreshold); SGBubbleVisitor bubbleVisit; SGBubbleEdgeVisitor bubbleEdgeVisit; SGContainRemoveVisitor containVisit; SGValidateStructureVisitor validationVisit; // Pre-assembly graph stats std::cout << "[Stats] Input graph:\n"; pGraph->visit(statsVisit); // Remove containments from the graph std::cout << "Removing contained vertices from graph\n"; while(pGraph->hasContainment()) pGraph->visit(containVisit); // Pre-assembly graph stats std::cout << "[Stats] After removing contained vertices:\n"; pGraph->visit(statsVisit); // Remove any extraneous transitive edges that may remain in the graph if(opt::bPerformTR) { std::cout << "Removing transitive edges\n"; pGraph->visit(trVisit); } // Compact together unbranched chains of vertices pGraph->simplify(); if(opt::bValidate) { std::cout << "Validating graph structure\n"; pGraph->visit(validationVisit); } // Remove dead-end branches from the graph if(opt::numTrimRounds > 0) { std::cout << "Trimming bad vertices\n"; int numTrims = opt::numTrimRounds; while(numTrims-- > 0) pGraph->visit(trimVisit); std::cout << "\n[Stats] Graph after trimming:\n"; pGraph->visit(statsVisit); } // Resolve small repeats if(opt::resolveSmallRepeatLen > 0) { SGSmallRepeatResolveVisitor smallRepeatVisit(opt::resolveSmallRepeatLen); std::cout << "Resolving small repeats\n"; int totalSmallRepeatRounds = 0; while(pGraph->visit(smallRepeatVisit)) std::cout << "Finished small repeat resolve round " << totalSmallRepeatRounds++ << "\n"; std::cout << "\n[Stats] After small repeat resolution:\n"; pGraph->visit(statsVisit); } // if(opt::coverageCutoff > 0) { std::cout << "Coverage visit\n"; SGCoverageVisitor coverageVisit(opt::coverageCutoff); pGraph->visit(coverageVisit); pGraph->visit(trimVisit); pGraph->visit(trimVisit); pGraph->visit(trimVisit); } // Peform another round of simplification pGraph->simplify(); if(opt::numBubbleRounds > 0) { std::cout << "\nPerforming variation smoothing\n"; SGSmoothingVisitor smoothingVisit(opt::outVariantsFile, opt::maxBubbleGapDivergence, opt::maxBubbleDivergence, opt::maxIndelLength); int numSmooth = opt::numBubbleRounds; while(numSmooth-- > 0) pGraph->visit(smoothingVisit); pGraph->simplify(); } pGraph->renameVertices("contig-"); std::cout << "\n[Stats] Final graph:\n"; pGraph->visit(statsVisit); // Rename the vertices to have contig IDs instead of read IDs //pGraph->renameVertices("contig-"); // Write the results SGFastaVisitor av(opt::outContigsFile); pGraph->visit(av); pGraph->writeASQG(opt::outGraphFile); delete pGraph; }