Example #1
0
//
// Main
//
int correctMain(int argc, char** argv)
{
    parseCorrectOptions(argc, argv);

    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = NULL;

    // If the correction mode is k-mer only, then do not load the reverse
    // BWT as it is not needed
    if(opt::algorithm != ECA_KMER)
        pRBWT = new BWT(opt::prefix + RBWT_EXT, opt::sampleRate);
    
    BWTIntervalCache intervalCache(opt::intervalCacheLength, pBWT);

    OverlapAlgorithm* pOverlapper = new OverlapAlgorithm(pBWT, NULL, 
                                                         opt::errorRate, opt::seedLength, 
                                                         opt::seedStride, false, opt::branchCutoff);
    

    // Learn the parameters of the kmer corrector
    if(opt::bLearnKmerParams)
    {
        int threshold = learnKmerParameters(pBWT);
        if(threshold != -1)
            CorrectionThresholds::Instance().setBaseMinSupport(threshold);
    }


    // Open outfiles and start a timer
    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = (!opt::discardFile.empty() ? createWriter(opt::discardFile) : NULL);
    Timer* pTimer = new Timer(PROGRAM_IDENT);
    pBWT->printInfo();

    // Set the error correction parameters
    ErrorCorrectParameters ecParams;
    ecParams.pOverlapper = pOverlapper;
    ecParams.pIntervalCache = &intervalCache;
    ecParams.algorithm = opt::algorithm;

    ecParams.minOverlap = opt::minOverlap;
    ecParams.numOverlapRounds = opt::numOverlapRounds;
    ecParams.conflictCutoff = opt::conflictCutoff;

    ecParams.numKmerRounds = opt::numKmerRounds;
    ecParams.kmerLength = opt::kmerLength;
    ecParams.printOverlaps = opt::verbose > 1;

    // Setup post-processor
    bool bCollectMetrics = !opt::metricsFile.empty();
    ErrorCorrectPostProcess postProcessor(pWriter, pDiscardWriter, bCollectMetrics);

    if(opt::numThreads <= 1)
    {
        // Serial mode
        ErrorCorrectProcess processor(ecParams); 
        SequenceProcessFramework::processSequencesSerial<SequenceWorkItem,
                                                         ErrorCorrectResult, 
                                                         ErrorCorrectProcess, 
                                                         ErrorCorrectPostProcess>(opt::readsFile, &processor, &postProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<ErrorCorrectProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            ErrorCorrectProcess* pProcessor = new ErrorCorrectProcess(ecParams);
            processorVector.push_back(pProcessor);
        }
        
        SequenceProcessFramework::processSequencesParallel<SequenceWorkItem,
                                                           ErrorCorrectResult, 
                                                           ErrorCorrectProcess, 
                                                           ErrorCorrectPostProcess>(opt::readsFile, processorVector, &postProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
        {
            delete processorVector[i];
        }
    }

    if(bCollectMetrics)
    {
        std::ostream* pMetricsWriter = createWriter(opt::metricsFile);
        postProcessor.writeMetrics(pMetricsWriter);
        delete pMetricsWriter;
    }

    delete pBWT;
    if(pRBWT != NULL)
        delete pRBWT;

    delete pOverlapper;
    delete pTimer;
    
    delete pWriter;
    if(pDiscardWriter != NULL)
        delete pDiscardWriter;

    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}
Example #2
0
//
// Main
//
int correctMain(int argc, char** argv)
{
    parseCorrectOptions(argc, argv);

    std::cout << "Correcting sequencing errors for " << opt::readsFile << "\n";

    // Load indices
    BWT* pBWT = new BWT(opt::prefix + BWT_EXT, opt::sampleRate);
    BWT* pRBWT = NULL;
    SampledSuffixArray* pSSA = NULL;

    if(opt::algorithm == ECA_OVERLAP)
        pSSA = new SampledSuffixArray(opt::prefix + SAI_EXT, SSA_FT_SAI);

    BWTIntervalCache* pIntervalCache = new BWTIntervalCache(opt::intervalCacheLength, pBWT);

    BWTIndexSet indexSet;
    indexSet.pBWT = pBWT;
    indexSet.pRBWT = pRBWT;
    indexSet.pSSA = pSSA;
    indexSet.pCache = pIntervalCache;

    // Learn the parameters of the kmer corrector
    if(opt::bLearnKmerParams)
    {
        int threshold = learnKmerParameters(pBWT);
        if(threshold != -1)
            CorrectionThresholds::Instance().setBaseMinSupport(threshold);
    }

    // Open outfiles and start a timer
    std::ostream* pWriter = createWriter(opt::outFile);
    std::ostream* pDiscardWriter = (!opt::discardFile.empty() ? createWriter(opt::discardFile) : NULL);
    Timer* pTimer = new Timer(PROGRAM_IDENT);
    pBWT->printInfo();

    // Set the error correction parameters
    ErrorCorrectParameters ecParams;
    ecParams.pOverlapper = NULL;
    ecParams.indices = indexSet;
    ecParams.algorithm = opt::algorithm;

    ecParams.minOverlap = opt::minOverlap;
    ecParams.numOverlapRounds = opt::numOverlapRounds;
    ecParams.minIdentity = 1.0f - opt::errorRate;
    ecParams.conflictCutoff = opt::conflictCutoff;

    ecParams.numKmerRounds = opt::numKmerRounds;
    ecParams.kmerLength = opt::kmerLength;
    ecParams.printOverlaps = opt::verbose > 0;

    // Setup post-processor
    bool bCollectMetrics = !opt::metricsFile.empty();
    ErrorCorrectPostProcess postProcessor(pWriter, pDiscardWriter, bCollectMetrics);

    if(opt::numThreads <= 1)
    {
        // Serial mode
        ErrorCorrectProcess processor(ecParams);
        SequenceProcessFramework::processSequencesSerial<SequenceWorkItem,
                                 ErrorCorrectResult,
                                 ErrorCorrectProcess,
                                 ErrorCorrectPostProcess>(opt::readsFile, &processor, &postProcessor);
    }
    else
    {
        // Parallel mode
        std::vector<ErrorCorrectProcess*> processorVector;
        for(int i = 0; i < opt::numThreads; ++i)
        {
            ErrorCorrectProcess* pProcessor = new ErrorCorrectProcess(ecParams);
            processorVector.push_back(pProcessor);
        }

        SequenceProcessFramework::processSequencesParallel<SequenceWorkItem,
                                 ErrorCorrectResult,
                                 ErrorCorrectProcess,
                                 ErrorCorrectPostProcess>(opt::readsFile, processorVector, &postProcessor);

        for(int i = 0; i < opt::numThreads; ++i)
        {
            delete processorVector[i];
        }
    }

    if(bCollectMetrics)
    {
        std::ostream* pMetricsWriter = createWriter(opt::metricsFile);
        postProcessor.writeMetrics(pMetricsWriter);
        delete pMetricsWriter;
    }

    delete pBWT;
    delete pIntervalCache;
    if(pRBWT != NULL)
        delete pRBWT;

    if(pSSA != NULL)
        delete pSSA;

    delete pTimer;

    delete pWriter;
    if(pDiscardWriter != NULL)
        delete pDiscardWriter;

    if(opt::numThreads > 1)
        pthread_exit(NULL);

    return 0;
}