Ejemplo n.º 1
0
//
// Main
//
int filterMain(int argc, char** argv)
{
    parseFilterOptions(argc, argv);
    Timer* pTimer = new Timer(PROGRAM_IDENT);


    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    //pBWT->printInfo();

    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = createWriter(opt::discardFile);
    QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter);

    // If performing duplicate check, create a bitvector to record
    // which reads are duplicates
    BitVector* pSharedBV = NULL;
    if(opt::dupCheck)
        pSharedBV = new BitVector(pBWT->getNumStrings());

    // Set up QC parameters
    QCParameters params;
    params.pBWT = pBWT;
    params.pRevBWT = pRBWT;
    params.pSharedBV = pSharedBV;

    params.checkDuplicates = opt::dupCheck;
    params.substringOnly = opt::substringOnly;
    params.checkKmer = opt::kmerCheck;
    params.checkHPRuns = opt::hpCheck;
    params.checkDegenerate = opt::lowComplexityCheck;

    params.verbose = opt::verbose;

    params.kmerLength = opt::kmerLength;
    params.kmerThreshold = opt::kmerThreshold;

    params.hpKmerLength = 51;
    params.hpHardAcceptCount = 10;
    params.hpMinProportion = 0.1f;
    params.hpMinLength = 6;

    if(opt::numThreads <= 1)
    {
        // Serial mode
        QCProcess processor(params);
        PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<QCProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            QCProcess* pProcessor = new QCProcess(params);
            processorVector.push_back(pProcessor);
        }

        PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
            delete processorVector[i];
    }

    delete pPostProcessor;
    delete pWriter;
    delete pDiscardWriter;

    delete pBWT;
    delete pRBWT;

    if(pSharedBV != NULL)
        delete pSharedBV;

    std::cout << "RE-building index for " << opt::outFile << " in memory using ropebwt2\n";
    std::string prefix=stripFilename(opt::outFile);
        //BWT *pBWT, *pRBWT;
		#pragma omp parallel
		{
			#pragma omp single nowait
			{	
			    std::string bwt_filename = prefix + BWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, bwt_filename, opt::numThreads, false);
				std::cout << "\t done bwt construction, generating .sai file\n";
				pBWT = new BWT(bwt_filename);
			}
			#pragma omp single nowait
			{	
				std::string rbwt_filename = prefix + RBWT_EXT;
				BWTCA::runRopebwt2(opt::outFile, rbwt_filename, opt::numThreads, true);
				std::cout << "\t done rbwt construction, generating .rsai file\n";
				pRBWT = new BWT(rbwt_filename);
			}
		}
        std::string sai_filename = prefix + SAI_EXT;
		SampledSuffixArray ssa;
        ssa.buildLexicoIndex(pBWT, opt::numThreads);
        ssa.writeLexicoIndex(sai_filename);
        delete pBWT;

        std::string rsai_filename = prefix + RSAI_EXT;
        SampledSuffixArray rssa;
        rssa.buildLexicoIndex(pRBWT, opt::numThreads);
        rssa.writeLexicoIndex(rsai_filename);
        delete pRBWT;

    // Cleanup
    delete pTimer;

    return 0;
}
Ejemplo n.º 2
0
//
// Main
//
int filterMain(int argc, char** argv)
{
    parseFilterOptions(argc, argv);
    Timer* pTimer = new Timer(PROGRAM_IDENT);


    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    pBWT->printInfo();

    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = createWriter(opt::discardFile);
    QCPostProcess* pPostProcessor = new QCPostProcess(pWriter, pDiscardWriter);

    // If performing duplicate check, create a bitvector to record
    // which reads are duplicates
    BitVector* pSharedBV = NULL;
    if(opt::dupCheck)
        pSharedBV = new BitVector(pBWT->getNumStrings());

    // Set up QC parameters
    QCParameters params;
    params.pBWT = pBWT;
    params.pRevBWT = pRBWT;
    params.pSharedBV = pSharedBV;

    params.checkDuplicates = opt::dupCheck;
    params.substringOnly = opt::substringOnly;
    params.checkKmer = opt::kmerCheck;
    params.kmerBothStrand = opt::kmerBothStrand;
    params.checkHPRuns = opt::hpCheck;
    params.checkDegenerate = opt::lowComplexityCheck;

    params.verbose = opt::verbose;

    params.kmerLength = opt::kmerLength;
    params.kmerThreshold = opt::kmerThreshold;

    params.hpKmerLength = 51;
    params.hpHardAcceptCount = 10;
    params.hpMinProportion = 0.1f;
    params.hpMinLength = 6;

    if(opt::numThreads <= 1)
    {
        // Serial mode
        QCProcess processor(params);
        PROCESS_FILTER_SERIAL(opt::readsFile, &processor, pPostProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<QCProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            QCProcess* pProcessor = new QCProcess(params);
            processorVector.push_back(pProcessor);
        }

        PROCESS_FILTER_PARALLEL(opt::readsFile, processorVector, pPostProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
            delete processorVector[i];
    }

    delete pPostProcessor;
    delete pWriter;
    delete pDiscardWriter;

    delete pBWT;
    delete pRBWT;

    if(pSharedBV != NULL)
        delete pSharedBV;

    // Rebuild the FM-index without the discarded reads
    std::string out_prefix = stripFilename(opt::outFile);
    removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, BWT_EXT, SAI_EXT, false, opt::numThreads);
    removeReadsFromIndices(opt::prefix, opt::discardFile, out_prefix, RBWT_EXT, RSAI_EXT, true, opt::numThreads);

    // Cleanup
    delete pTimer;
    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}