示例#1
0
//
// Main
//
int filterBAMMain(int argc, char** argv)
{
    parseFilterBAMOptions(argc, argv);

    // Read the graph if distance-filtering mode is enabled
    StringGraph* pGraph = NULL;
    if(!opt::asqgFile.empty())
        pGraph = SGUtil::loadASQG(opt::asqgFile, 0, false);

    // Read the BWTs if depth-filtering mode is enabled
    BWT* pBWT = NULL;
    BWT* pRBWT = NULL;
    if(!opt::fmIndexPrefix.empty())
    {
        pBWT = new BWT(opt::fmIndexPrefix + BWT_EXT, opt::sampleRate);
        pRBWT = new BWT(opt::fmIndexPrefix + RBWT_EXT, opt::sampleRate);
    }

    Timer* pTimer = new Timer(PROGRAM_IDENT);    

    // 
    int numPairsTotal = 0;
    int numPairsFilteredByDistance = 0;
    int numPairsFilteredByER = 0;
    int numPairsFilteredByQuality = 0;
    int numPairsFilteredByDepth = 0;
    int numPairsUnmapped = 0;
    int numPairsWrote = 0;

    // Open the bam files for reading/writing
    BamTools::BamReader* pBamReader = new BamTools::BamReader;
    pBamReader->Open(opt::bamFile);

    BamTools::BamWriter* pBamWriter = new BamTools::BamWriter;
    pBamWriter->Open(opt::outFile, pBamReader->GetHeaderText(), pBamReader->GetReferenceData());
    const BamTools::RefVector& referenceVector = pBamReader->GetReferenceData();


    BamTools::BamAlignment record1;
    BamTools::BamAlignment record2;
    bool done = false;

    while(!done)
    {
        if(numPairsTotal++ % 200000 == 0)
            printf("[sga filterBAM] Processed %d pairs\n", numPairsTotal);

        done = !readAlignmentPair(pBamReader, record1, record2);
        if(done)
            break;

        if(!record1.IsMapped() || !record2.IsMapped())
        {
            numPairsUnmapped += 1;
            continue;
        }

        // Ensure the pairing is correct
        if(record1.Name != record2.Name)
        {
            std::cout << "NAME FAIL: " << record1.Name << " " << record2.Name << "\n";
        }
        assert(record1.Name == record2.Name);
        bool bPassedFilters = true;

        // Check if the error rate is below the max
        double er1 = getErrorRate(record1);
        double er2 = getErrorRate(record2);

        if(er1 > opt::maxError || er2 > opt::maxError)
        {
            bPassedFilters = false;
            numPairsFilteredByER += 1;
        }

        if(record1.MapQuality < opt::minQuality || record2.MapQuality < opt::minQuality)
        {
            bPassedFilters = false;
            numPairsFilteredByQuality += 1;
        }

        // Perform depth check for pairs aligning to different contigs
        if(bPassedFilters && (pBWT != NULL && pRBWT != NULL && opt::maxKmerDepth > 0) && (record1.RefID != record2.RefID))
        {
            int maxDepth1 = getMaxKmerDepth(record1.QueryBases, pBWT, pRBWT);
            int maxDepth2 = getMaxKmerDepth(record1.QueryBases, pBWT, pRBWT);
            if(maxDepth1 > opt::maxKmerDepth || maxDepth2 > opt::maxKmerDepth)
            {
                bPassedFilters = false;
                numPairsFilteredByDepth += 1;
            }
        }

        // Perform short-insert pair check
        if(pGraph != NULL)
        {
            bPassedFilters = bPassedFilters && filterByGraph(pGraph, referenceVector, record1, record2);
            numPairsFilteredByDistance += 1;
        }
        if(bPassedFilters)
        {
            pBamWriter->SaveAlignment(record1);
            pBamWriter->SaveAlignment(record2);
            numPairsWrote += 1;
        }
    }

    std::cout << "Total pairs: " << numPairsTotal << "\n";
    std::cout << "Total pairs output: " << numPairsWrote << "\n";
    std::cout << "Total filtered because one pair is unmapped: " << numPairsUnmapped << "\n";
    std::cout << "Total filtered by distance: " << numPairsFilteredByDistance << "\n";
    std::cout << "Total filtered by error rate: " << numPairsFilteredByER << "\n";
    std::cout << "Total filtered by quality: " << numPairsFilteredByQuality << "\n";
    std::cout << "Total filtered by depth: " << numPairsFilteredByDepth << "\n";
    
    if(pGraph != NULL)
        delete pGraph;

    if(pBWT != NULL)
        delete pBWT;

    if(pRBWT != NULL)
        delete pRBWT;

    pBamWriter->Close();
    pBamReader->Close();

    delete pTimer;
    delete pBamReader;
    delete pBamWriter;
    return 0;
}