Beispiel #1
0
// locate (& load) index files for BAM readers that don't already have one loaded
bool BamMultiReaderPrivate::LocateIndexes(const BamIndex::IndexType& preferredType)
{

    bool errorsEncountered = false;
    m_errorString.clear();

    // iterate over readers
    std::vector<MergeItem>::iterator readerIter = m_readers.begin();
    std::vector<MergeItem>::iterator readerEnd = m_readers.end();
    for (; readerIter != readerEnd; ++readerIter) {
        MergeItem& item = (*readerIter);
        BamReader* reader = item.Reader;
        if (reader == 0) continue;

        // if reader has no index, try to locate one
        if (!reader->HasIndex()) {
            if (!reader->LocateIndex(preferredType)) {
                m_errorString.append(1, '\t');
                m_errorString.append(reader->GetErrorString());
                m_errorString.append(1, '\n');
                errorsEncountered = true;
            }
        }
    }

    // check for errors encountered before returning success/fail
    if (errorsEncountered) {
        const std::string currentError = m_errorString;
        const std::string message =
            std::string("error while locating index files: \n") + currentError;
        SetErrorString("BamMultiReader::LocatingIndexes", message);
        return false;
    } else
        return true;
}
Beispiel #2
0
bool BamParse::loadIndex(BamReader& reader)
{
	bool bsignal = reader.OpenIndex(this->filename_index);
	if(bsignal==false) 
	{
		cerr << "Index file not found, now create it!!!"<<endl;
		bool bci=reader.CreateIndex();
		if(bci==false)
		{
			cerr << "Index file cannot be created!!!"<<endl;
			return false;
		}
		else
		{
			bool blct=reader.LocateIndex();
			if(blct==false)
			{
				cerr << "Index file cannot be located!!!"<<endl;
				return false;
			}
		}

	}
	return true;
}
Beispiel #3
0
int main (int argc, char** argv)
{

    // Print Commandline
    string ss(argv[0]);   // convert Char to String
    string commandline = "##Print Command line " + ss;

    int c;

    FastaReference* reference = NULL;
    int minbaseQ        = 10;   //default
    int windowlen       = 40;  //by default
    string regionstr;
    string RegionFile;
    string bamfile;
    bool STdin          = false;
    bool has_region     = false;
    bool has_regionFile = false;
    bool has_bamfile    = false;
    bool has_ref        = false;
    int ploidy         = 2;
    bool SetLowComplexityRegionSWGapExt = false;
    bool SetLowComplexityRegion = false;
   

    if (argc < 2)
    {
        printSummary(argv);
        exit(1);
    }

    while (true)
    {
        static struct option long_options[] =
        {
            {"help", no_argument, 0, 'h'},
            {"ploidy", required_argument, 0, 'p'},
            {"window-size", required_argument, 0, 'w'},
            {"reference", required_argument, 0, 'f'},
            {"min-base-quality", required_argument, 0,'q'},
            {"Region", required_argument, 0, 'R'},
            {"STdin", no_argument, 0, 's'},
            {"bam", required_argument, 0, 'b'},
            {"Repeat-Extgap", no_argument, 0, 'E'},
            {"LowCompex", no_argument, 0, 'l'},
            {0, 0, 0, 0}
        };

        int option_index = 0;

        c = getopt_long (argc, argv, "hslEf:q:w:s:r:R:p:b:", long_options, &option_index);

        /* Detect the end of the options. */
        if (c == -1)
            break;

        switch (c)
        {
            case 'f':
                reference = new FastaReference(optarg); // will exit on open failure
                commandline = commandline + " -f " + optarg;
                has_ref = true;
                break;

            case 'b':
                has_bamfile = true;
                bamfile = optarg;
                commandline = commandline + " -b " + optarg;
                break;

            case 'r':
                regionstr = optarg;
                has_region = true;
                commandline = commandline + " -r " + optarg;
                break;

             case 'R':
                RegionFile = optarg;
                has_regionFile = true;
                commandline = commandline + " -R " + optarg;
                break;

            case 's':
                STdin = true;
                commandline = commandline + " -s ";
                break;
                
            case 'q':
                minbaseQ = atoi(optarg);
                commandline = commandline + " -q " + optarg;
                break;

            case 'w':
                windowlen = atoi(optarg);
                commandline = commandline + " -w " + optarg;
                break;

            case 'p':
                ploidy = atoi(optarg);
                commandline = commandline + " -p " + optarg;
                break;

            case 'E':
                SetLowComplexityRegionSWGapExt = true;
                commandline = commandline + " -E ";
                break;

            case 'l':
                SetLowComplexityRegion = true;
                commandline = commandline + " -l ";
                break;

            case 'h':
                printSummary(argv);
                commandline = commandline + " -h ";
                exit(0);
                break;

            case '?':
                printSummary(argv);
                exit(1);
                break;

              default:
                abort();
                break;
        }
    }

    //// Open Error log files
    ofstream cerrlog("bonsaiReport.txt");
    streambuf *cerrsave = std::cerr.rdbuf();

    // Redirect stream buffers
    if (cerrlog.is_open())
        cerr.rdbuf(cerrlog.rdbuf());

    cerr << commandline << endl;
    

    //Check for Reference Fasta sequence
    if (!has_ref)
    {
        cerr << "no FASTA reference provided, cannot realign" << endl;
        exit(1);
    }

    ////Check for reader
    BamReader reader;
    if (STdin == true)
    {
        if (!reader.Open("stdin"))
        {
            cerr << "could not open stdin bam for reading" << endl;
            cerr << reader.GetErrorString() << endl;
            reader.Close();
            printSummary(argv);
        }
    }
    else
    {
        if (has_bamfile == true)
        {
            if (!reader.Open(bamfile))
            {
                cerr << "ERROR: could not open bam files from stdin ... Aborting" << endl;
                cerr << reader.GetErrorString() << endl;
                reader.Close();
                printSummary(argv);
            }

            if ( !reader.LocateIndex() )
                reader.CreateIndex();
        }
        else
        {
            cerr << "--bam flag is set but no bamfile is provided... Aborting" << endl;
            reader.Close();
            printSummary(argv);
        }
    }

    //// Check Region Tags
    if ( (has_regionFile == true) && (has_region == true) )
    {
        cerr << "ERROR: You provide both region and has provide a Set Region List... Aborting" << endl;
        exit(1);
    }

    //// store the names of all the reference sequences in the BAM file
    vector<RefData> referencedata = reader.GetReferenceData();
   
    //// Store Region LIST
    vector<BamRegion> regionlist;
    if (has_region == true)
    {
        BamRegion region;
        ParseRegionString(regionstr, reader, region);
        regionlist.push_back(region);
    }
    else if (has_regionFile == true)
    {
        ifstream RG(RegionFile.c_str(), ios_base::in);
        string line;
        while(getline(RG,line))
        {
            BamRegion region;
            ParseRegionString(line, reader, region);
            regionlist.push_back(region);
        }
        RG.close();
    }
    else if ( (has_regionFile == false) && (has_region == false) )
    {
        for (int i= 0; i < (int)referencedata.size(); i++)
        {
            string regionstr = referencedata.at(i).RefName;
            BamRegion region;
            ParseRegionString(regionstr, reader, region);
            if (!reader.SetRegion(region)) // Bam region will get [0,101) = 0 to 100 => [closed, half-opened)
            {
                cerr << "ERROR: set region " << regionstr << " failed. Check that REGION describes a valid range... Aborting" << endl;
                reader.Close();
                exit(1);
            }
            else
                regionlist.push_back(region);
        }
    }

    //// 
    BamWriter writer;
    if (!writer.Open("stdout", reader.GetHeaderText(), reader.GetReferenceData()))
    {
        cerr << "could not open stdout for writing" << endl;
        exit(1);
    }

    //// Smallest start position and Largest end position for Req Seq
    vector<RefData>::iterator refdataIter = referencedata.begin();
    vector<BamRegion>::iterator regionListIter = regionlist.begin();
   

    // CLASS
    RealignFunctionsClass RealignFunction;

    map<int, string> RefIDRedName;
    vector<SalRealignInfo> AlGroups;
    multimap<int, BamAlignment> SortRealignedAlignmentsMultimap;

    int refid               = 0;
    BamAlignment alignment;
    bool IsNextAlignment = reader.GetNextAlignment(alignment);
    //cerr << "   " << alignment.Name << " Chr  " << alignment.RefID << " Startpos: " << alignment.Position << " Endpos: " << alignment.GetEndPosition() << " Length: " << alignment.Length << endl;

    int windowrealigned     = 0;
    int TotalWindowDetected = 0;
    int TotalReadsAligned   = 0;
    int TotalWindow         = 0;
    int TotalReads          = 0;

    while (refdataIter != referencedata.end() )
    {
        string refname = refdataIter->RefName;
        RefIDRedName[refid] = refname;
        int reflength = refdataIter->RefLength;
        int winstartpos, winendpos;
        int AllowableBasesInWindow = 1;
        bool nextChrName = false;

        cerr << "##HeaderINFO: RefID = " << refdataIter->RefName << "\t" << "RefLen = " << reflength << endl;
        
        while (nextChrName == false )
        {
            vector<int> minmaxRefSeqPos;
            bool IsPassDetectorNoRealignment = false;
            minmaxRefSeqPos.push_back(-1);
            minmaxRefSeqPos.push_back(0);
            //cerr << " region: " << (*regionListIter).LeftRefID << " : " << (*regionListIter).LeftPosition << " .. " << (*regionListIter).RightPosition << endl;
            if ((refid == (int)referencedata.size() - 1) && ((*regionListIter).LeftRefID == refid) && ((has_region==true) || (has_regionFile==true)) )
            {
                ////
                if ( (has_region == true) || (has_regionFile == true) )
                {                    
                    winstartpos = (*regionListIter).LeftPosition;
                    winendpos   = winstartpos + windowlen - 1;
                    reflength = (*regionListIter).RightPosition;
                    if (reflength < winendpos)
                        reflength = winendpos;
                                       
                    // Get Next Alignment First
                    if ( (refid == alignment.RefID) && (winstartpos == (*regionListIter).LeftPosition) && (IsNextAlignment == false) )
                        IsNextAlignment = reader.GetNextAlignment(alignment);
                }
                else if (has_region == false)
                {
                    winstartpos = 0;
                    winendpos   = winstartpos + windowlen - 1;

                    // Get Next Alignment First
                    if ( (refid == alignment.RefID) && (winstartpos == 0) && (IsNextAlignment == false) )
                        IsNextAlignment = reader.GetNextAlignment(alignment);
                }
                //cerr << " winstart: " << winstartpos << " ; winend: " << winendpos;
                //cerr << "   " << alignment.Name << " Chr  " << alignment.RefID << " Startpos: " << alignment.Position << " Endpos: " << alignment.GetEndPosition() << " Length: " << alignment.Length << endl;

                ////
                while ((winstartpos < reflength))
                {
                    //// Check window end position
                    if (winendpos > reflength)
                        winendpos = reflength;

                    // Reinitialized
                    unsigned int NewReadMappedcount = 0;
                
                    //// Save and Erase alignments that are outside of window (Deque?)
                    if (!AlGroups.empty())
                    {
                        minmaxRefSeqPos.at(0) = -1;
                        minmaxRefSeqPos.at(1) = 0;

                        //cerr << "#Start: Keep alignments with start position exceed the right end of the window/Region " << endl;
                        vector<SalRealignInfo>::iterator Iter = AlGroups.begin();

                        while (Iter != AlGroups.end())
                        {
                            // Erase alignment s
                            if ((*Iter).al.GetEndPosition() < winstartpos)
                            {
                                //cerr << "  ToWrite: " << (*Iter).second.size() << " ; " << (*Iter).al.Name << " ; " << (*Iter).al.Position << " < " << winstartpos << " : " << (*Iter).al.GetEndPosition() << endl;
                                SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > ((*Iter).al.Position, (*Iter).al));
                                AlGroups.erase(Iter);

                                //cerr << "  ToWrite: DONE " << endl;
                            } 
                            else
                            {
                                string referenceSequence = reference->getSubSequence(RefIDRedName[(*Iter).al.RefID], (*Iter).al.Position, 2*(*Iter).al.Length);
                            
                                if ((*Iter).HasRealign == true )
                                {
                                    (*Iter).currentReadPosition = 0;
                                    (*Iter).currentGenomeSeqPosition = 0;
                                    (*Iter).currentAlPosition = (*Iter).al.Position;
                                    (*Iter).cigarindex = 0;
                                }

                                (*Iter).CigarSoftclippingLength = 0;
                                SalRealignInfo talr = (*Iter);
                                //cerr << "  ToKEEP: " << (*Iter).al.Name << " ; " << (*Iter).al.Position << " < " << winstartpos << " : " << (*Iter).al.GetEndPosition() << endl;
                                RealignFunction.ParseAlignmentsAndExtractVariantsByBaseQualv(AlGroups, talr, Iter, (*Iter).al, referenceSequence, minmaxRefSeqPos, winstartpos, winendpos, (float) minbaseQ, false);
                           
                                ++Iter; //Increment iterator
                            }
                        }
                    }
                

                    // Write Sorted Alignments that are outside of window
                    //cerr << "SortRealignedAlignmentsMultimap: " << SortRealignedAlignmentsMultimap.size() << " minmaxRefSeqPos.at(0)= " << minmaxRefSeqPos.at(0) << endl;
                    if (!SortRealignedAlignmentsMultimap.empty()) // && (winWrite < winstartpos ) )
                    {
                        //cerr << "#Start: Write alignments and delete alignments with start position exceed the right end of the window/Region " << endl;
                        multimap<int, BamAlignment>::iterator sraIter = SortRealignedAlignmentsMultimap.begin();

                        while (sraIter != SortRealignedAlignmentsMultimap.end()) 
                        {
                            //cerr << " (*sraIter).first= " <<  (*sraIter).first << " minmaxRefSeqPos.at(0)= " << minmaxRefSeqPos.at(0) << " winstartpos - ((windowlen - 1)*0.9)= " << winstartpos - ((windowlen - 1)*0.9) << endl;
                            if (((float) (*sraIter).first < floor((float) (winstartpos - ((windowlen - 1)*0.9)))) && ((minmaxRefSeqPos.at(0) > 0) && ((*sraIter).first < minmaxRefSeqPos.at(0)))) {
                                //writer.SaveAlignment((*sraIter).second);  // Why sometimes, it doesn't work ?????
                                if (!writer.SaveAlignment((*sraIter).second))
                                    cerr << writer.GetErrorString() << endl;

                                SortRealignedAlignmentsMultimap.erase(sraIter++);
                            } else {
                                ++sraIter;
                            }
                    }
                    //cerr << "#Done: Write alignments and delete alignments with start position exceed the right end of the window/Region " << endl;
                    }

                    //cerr << " winstart: " << winstartpos << " ; winend: " << winendpos;
                    //cerr << "   " << alignment.Name << " Chr  " << alignment.RefID << " Startpos: " << alignment.Position << " Endpos: " << alignment.GetEndPosition() << " Length: " << alignment.Length << endl;
                    //cerr <<  ": " << alignment.RefID << " :" << RefIDRedName[alignment.RefID] << " : " << RefIDRedName[alignment.RefID] << endl;

                    //cerr << "Start: Gather aligmenets that lie (fully or partially) within the window frame and group INDELs if there are ... " << endl;
                    // Gather Reads within a window frame
                  
                    while ((IsNextAlignment) && (refid == alignment.RefID)) // Neeed more conditions
                    {
                        if (SetLowComplexityRegion == true) 
                        {
                            string sequenceInWindow = reference->getSubSequence(RefIDRedName[alignment.RefID], winstartpos, (winendpos-winstartpos+1) );

                            if (IsWindowInRepeatRegion(sequenceInWindow) == true)
                            {
                                if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) == 0)
                                {
                                    TotalReads++;
                                    if (alignment.IsMapped())
                                    {
                                        string referenceSequence = reference->getSubSequence(RefIDRedName[alignment.RefID], alignment.Position, 2*alignment.Length);
 
                                        vector<SalRealignInfo>::iterator tIter;
                                        SalRealignInfo alr;
                                        alr.al = alignment;
                                        alr.currentReadPosition = 0;
                                        alr.currentGenomeSeqPosition = 0;
                                        alr.currentAlPosition = alignment.Position;
                                        alr.cigarindex = 0;
                                        alr.HasRealign = false;
                                        alr.CigarSoftclippingLength = 0;

                                        string str = "ZZZZZZZZZZZZZZZZZ";
                                        if (alignment.Name.find(str) != string::npos) {
                                            stringstream cigar;
                                            for (vector<CigarOp>::const_iterator cigarIter = alignment.CigarData.begin(); cigarIter != alignment.CigarData.end(); ++cigarIter)
                                                cigar << cigarIter->Length << cigarIter->Type;

                                            string cigarstr = cigar.str();
                                            cerr << "   TRACKING: " << alignment.RefID << " " << alignment.Name << " pos: " << alignment.Position << " cigar: " << cigarstr << endl;
                                        }

                                        RealignFunction.ParseAlignmentsAndExtractVariantsByBaseQualv(AlGroups, alr, tIter, alignment, referenceSequence, minmaxRefSeqPos, winstartpos, winendpos, (float) minbaseQ, true);
                                        NewReadMappedcount++;
                                    } 
                                    else
                                    {
                                        SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
                                        cerr << "UNmapped : " << alignment.Name << endl;
                                    }
                                } 
                                else if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) == 1)
                                {
                                    SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
                                }
                                else
                                    break;
                            } else {
                                if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) < 2)
                                    SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
                                else
                                    break;
                            }
                        }
                        else // (SetLowComplexityRegion == false)
                        {
                            if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) == 0)
                            {
                                TotalReads++;
                                if (alignment.IsMapped())
                                {
                                    string referenceSequence = reference->getSubSequence(RefIDRedName[alignment.RefID], alignment.Position, 2 * alignment.Length);

                                    vector<SalRealignInfo>::iterator tIter;
                                    SalRealignInfo alr;
                                    alr.al = alignment;
                                    alr.currentReadPosition = 0;
                                    alr.currentGenomeSeqPosition = 0;
                                    alr.currentAlPosition = alignment.Position;
                                    alr.cigarindex = 0;
                                    alr.HasRealign = false;
                                    alr.CigarSoftclippingLength = 0;

                                    string str = "ZZZZZZZZZZZZZZZZZ";
                                    if (alignment.Name.find(str) != string::npos)
                                    {
                                        stringstream cigar;
                                        for (vector<CigarOp>::const_iterator cigarIter = alignment.CigarData.begin(); cigarIter != alignment.CigarData.end(); ++cigarIter)
                                            cigar << cigarIter->Length << cigarIter->Type;

                                        string cigarstr = cigar.str();
                                        cerr << "   TRACKING: " << alignment.RefID << " " << alignment.Name << " pos: " << alignment.Position << " cigar: " << cigarstr << endl;
                                    }

                                    RealignFunction.ParseAlignmentsAndExtractVariantsByBaseQualv(AlGroups, alr, tIter, alignment, referenceSequence, minmaxRefSeqPos, winstartpos, winendpos, (float) minbaseQ, true);

                                    //cerr << " winstart: " << winstartpos << " ; winend: " << winendpos;
                                    //cerr << "   INDEL: " << alignment.RefID << " " << alignment.Name << " pos: " << alignment.Position << " Length: " << alignment.Length << " CIGARstr: " << cigarstr << endl;
                                    NewReadMappedcount++;
                                } 
                                else
                                {
                                    SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
                                    cerr << "UNmapped : " << alignment.Name << endl;
                                }
                            }
                            else if ((IsWithinWindow(alignment, winstartpos, winendpos, AllowableBasesInWindow)) == 1) {
                                SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > (alignment.Position, alignment));
                            }
                            else
                                break;
                        }

                        ////Get next alignment
                        IsNextAlignment = reader.GetNextAlignment(alignment);
                    }

                   //cerr << "Done: Gather aligmenets that lie (fully or partially) within the window frame and group INDELs if there are ... " << endl;

                    //// Detector Corner
                    bool ToRealign = MeetIndelDetectorThresholdv(AlGroups);
                    cerr << "MeetIndelDetectorThresholdv(AlGroups).size()= " << AlGroups.size() << endl;
                    
                    // **************
                    if (ToRealign)
                    {
                        //cerr << "  ToRealign: " << refdataIter->RefName << "\t" << reflength << "\t" << winstartpos << "\t" << winendpos << "\t" << AlGroups.size() << endl;
                        //cerr << "             minmaxRefSeqPos.at(1)= " << minmaxRefSeqPos.at(1) << " minmaxRefSeqPos.at(0)= " << minmaxRefSeqPos.at(0) << endl;

                        ////// Perform Realign routines
                        int TotalAlR = 0; // Total number of alignments to be realigned
                        int NumAlR = 0; // Now many alignments are aligned
                        TotalWindowDetected++;

                        cerr << "#Start: Meet Threshold, Realigning ... " << endl;

                        if (minmaxRefSeqPos.at(1) < winendpos)
                            minmaxRefSeqPos.at(1) = winendpos;

                        if (minmaxRefSeqPos.at(0) > winstartpos)
                            minmaxRefSeqPos.at(0) = winstartpos;

                        bool IsToRealign = RealignFunction.PruningByNaiveSelectionProcedureAndConstructHaplotypes2(winstartpos, winendpos, refid, refname, minmaxRefSeqPos, reference);

                        if (IsToRealign == true)
                        {
                            RealignFunction.SelectHaplotypeCandidates_SmithWatermanBSv(AlGroups, minmaxRefSeqPos, SetLowComplexityRegionSWGapExt);

                            minmaxRefSeqPos.at(0) = -1;
                            minmaxRefSeqPos.at(1) = 0;

                            int nextwinstartpos = winendpos + 1;
                            int nextwinendpos = winstartpos + windowlen - 1;
                            if (nextwinendpos > reflength)
                                nextwinendpos = reflength;

                            //cerr <<  "   Before Realign : " << SortRealignedAlignmentsMultimap.size() << endl;
                            RealignFunction.AdjustCigarsWRTChosenMultipleHaplotypesAndPrepareAlignmentsTobeWrittenOut(AlGroups, SortRealignedAlignmentsMultimap, reference, RefIDRedName, minmaxRefSeqPos, nextwinstartpos, nextwinendpos, minbaseQ, TotalAlR, NumAlR, ploidy);
                            IsPassDetectorNoRealignment = false; // Set flag to false to deactivate write functions

                            //cerr <<  "   After Realign : " << SortRealignedAlignmentsMultimap.size() << endl;

                            TotalReadsAligned += NumAlR;

                            if (NumAlR > 0) // Realignment done
                                windowrealigned++;
                        } else


                        cerr << "#Done: Meet Threshold, Realigning ... " << endl;
                    }


                    if (NewReadMappedcount > 0)
                        TotalWindow++;

                    RealignFunction.Clear();

                    //// Move the window frame
                    winstartpos = winendpos + 1;
                    winendpos = winstartpos + windowlen - 1;
                }

                //// Save and Erase remaining alignments that are outside of window (Deque?)
                if ((!AlGroups.empty())) {
                    cerr << "#Start: Write Remaining alignments and delete all alignments" << endl;

                    for (vector<SalRealignInfo>::iterator Iter = AlGroups.begin(); Iter != AlGroups.end(); ++Iter) {
                        //cerr << "    Remain alignment start: " << (*Iter).al.Name << " " << Iter->al.Position  << " < " << winstartpos << "  " << winendpos << endl;
                        SortRealignedAlignmentsMultimap.insert(pair<int, BamAlignment > ((*Iter).al.Position, (*Iter).al));
                    }

                    cerr << "#Done: Write Remaining alignments and delete all alignments" << endl;
                }

                AlGroups.clear();


                // Write Sorted remaining Alignments that are outside of window
                if (!SortRealignedAlignmentsMultimap.empty())
                {
                    for (multimap<int, BamAlignment>::iterator sraIter = SortRealignedAlignmentsMultimap.begin(); sraIter != SortRealignedAlignmentsMultimap.end(); ++sraIter)
                    {
                        //writer.SaveAlignment((*sraIter).second);
                        if (!writer.SaveAlignment((*sraIter).second))
                            cerr << writer.GetErrorString() << endl;
                    }
                    SortRealignedAlignmentsMultimap.clear();
                }

            }

            ++regionListIter;
            if ((*regionListIter).LeftRefID > refid)
                nextChrName = true;
        }

        //// If End of the chromosome position
        //// increament iterator
        ++refdataIter;
        ++refid;
    }


    reader.Close();
    writer.Close();

    cerr << "##-Completed- " << endl;
    cerr << " Total Reads processed =  " << TotalReads << endl;
    cerr << " Total Reads Aligned =    " << TotalReadsAligned << endl;
    cerr << " Total Window processed = " << TotalWindow << endl;
    cerr << " Total Window Detected =  " << TotalWindowDetected << endl;
    cerr << " Total Windows Aligned =  " << windowrealigned << endl;


    // Restore cerr's stream buffer before terminating
    if (cerrlog.is_open())
        cerr.rdbuf(cerrsave);

    commandline.clear();
    return 0;
}
Beispiel #4
0
int main(const int argc, char* const argv[]) {
    int c, min_mapQ=0, seed=chrono::system_clock::now().time_since_epoch().count();
    unsigned int flag_on=0, flag_off=0;
    string fn_tgt, fn_in, fn_out="", out_format="b";

    while ((c = getopt(argc, argv, "SbBcCt:h1Ho:q:f:F:ul:r:?T:R:L:s:@:m:x:U:")) >= 0) {
        switch (c) {
            case 's': seed = atoi(optarg); break;
            case 'm': break;
            case 'c': break;
            case 'S': break;
            case 'b': break;
            case 'C': break;
            case 'h': break;
            case 'H': break;
            case 'o': fn_out = optarg; break;
            case 'U': break;
            case 'f': flag_on |= strtol(optarg, 0, 0); break;
            case 'F': flag_off |= strtol(optarg, 0, 0); break;
            case 'q': min_mapQ = atoi(optarg); break;
            case 'u': out_format = "u"; break;
            case '1': break;
            case 'l': break;
            case 'r': break;
            case 't': fn_tgt = optarg; break;
            case 'R': break;
            case '?': return usage();
            case 'T': break;
            case 'B': break;
            case '@': break;
            case 'x': break;
            default: return usage();
        }
    }
    if (fn_tgt.compare("") == 0) return usage();
    if (argc == optind) return usage();
    fn_in = argv[optind];

    BamReader reader;
    if (!reader.Open(fn_in)) {
        cerr << "ERROR: cannot open [" << fn_in << "] for reading\n";
        return 1;
    }
    if (!reader.LocateIndex()) {
        cerr << "ERROR: cannot find BAM index for [" << fn_in << "]\n";
        return 1;
    }

    const SamHeader header = reader.GetHeader();
    if (header.SortOrder.compare("coordinate") != 0) {
        cerr << "ERROR: [" << fn_in << "] not sorted by coordinate\n";
        return 1;
    }
    const RefVector refseq = reader.GetReferenceData();

    vector<BamRegion> regions;
    vector<unsigned int> src_depths, tgt_depths;
    if (read_region_depth(fn_tgt.c_str(), reader, regions, src_depths, tgt_depths) != 0) return 1;

    BamWriter writer;
    if (!writer.Open(fn_out, header, refseq)) {
        cerr << "ERROR: cannot open [" << fn_out << "] for writing\n";
        return 1;
    }

    BamAlignment aln;
    vector<BamAlignment> reads;
    vector<string> paired, unpaired;
    unordered_map<int, int> kept;
    unordered_map<string, unsigned int> seen, sampled;
    unordered_map<string, vector<int> > pool;
    for (size_t i=0; i<regions.size(); ++i) {
        reads.clear();
        paired.clear();
        unpaired.clear();
        kept.clear();
        pool.clear();

        char region_string[256];
        sprintf(region_string, "%s:%d-%d", refseq[regions[i].LeftRefID].RefName.c_str(), regions[i].LeftPosition, regions[i].RightPosition);
        if (!reader.SetRegion(regions[i])) {
            cerr << "WARNING: failed to locate [" << region_string << "]\n";
            //cerr << "WARNING: failed to locate [" << refseq[regions[i].LeftRefID].RefName << ':' << regions[i].LeftPosition << '-' << regions[i].RightPosition << "]\n";
            continue;
        }
        while (reader.GetNextAlignment(aln)) {
            if ((aln.AlignmentFlag & flag_on) == flag_on && !(aln.AlignmentFlag & flag_off) && aln.MapQuality >= min_mapQ)
                reads.push_back(aln);
        }
        if (reads.size() == 0) continue;

        unsigned int depth = 0;
        for (size_t k=0; k<reads.size(); ++k) {
            aln = reads[k];
            string rn = aln.Name;
            if (seen.find(rn) != seen.end()) { // if seen in previous regions
                if (sampled.find(rn) != sampled.end()) { // if self or mate sampled before, sample it
                    if (sampled[rn] != aln.AlignmentFlag) kept[k] = 1; // if mate sampled before, keep it
                    depth += get_overlap(aln, regions[i]);
                }
                if (seen[rn] != aln.AlignmentFlag) seen[rn] = aln.AlignmentFlag;
            }
            else { // if not seen in previous regions
                pool[rn].push_back(k);
            }
            if (depth > tgt_depths[i]) break;
        }
        if (depth < tgt_depths[i]) {
            for (auto it=pool.begin(); it!=pool.end(); ++it) {
                if (it->second.size()>1)
                    paired.push_back(it->first);
                else
                    unpaired.push_back(it->first);
            }
            shuffle(paired.begin(), paired.end(), default_random_engine(seed));
            shuffle(unpaired.begin(), unpaired.end(), default_random_engine(seed));
            int n1=paired.size(), n2=unpaired.size(), k1, k2, k3;
            while (depth < tgt_depths[i] && n1+n2 > 0) {
                if (n1>0) {
                    k1 = pool[paired[--n1]][0];
                    k2 = pool[paired[n1]][1];
                    depth += get_overlap(reads[k1], regions[i]);
                    depth += get_overlap(reads[k2], regions[i]);
                    kept[k1] = 1; kept[k2] = 1;
                    continue;
                }
                if (n2>0) {
                    k3 = pool[unpaired[--n2]][0];
                    depth += get_overlap(reads[k3], regions[i]);
                    kept[k3] = 1;
                    continue;
                }
            }
        }
        for (auto it=pool.begin(); it!=pool.end(); ++it) {
            string rn = it->first;
            seen[rn] = reads[pool[rn].back()].AlignmentFlag;
        }
        for (auto it=kept.begin(); it!=kept.end(); ++it) {
            int k = it->first;
            string rn = reads[k].Name;
            sampled[rn] = reads[k].AlignmentFlag;
            writer.SaveAlignment(reads[k]);
        }
        cerr << "INFO: target=[" << tgt_depths[i] << "], actual=[" << depth << "], N(reads)=[" << reads.size() << "], N(kept)=[" << kept.size() << "] at [" << region_string << "]\n";
    }

    reader.Close();

    return 0;
}