Esempio n. 1
0
//{{{ SV_Pair:: SV_Pair(const BamAlignment &bam_a,
// if both reads are on the same chrome, then read_l must map before read_r
// if the reads are on different strands then read_l must be on the lexo
// lesser chrom (using the string.compare() method)
SV_Pair::
SV_Pair(const BamAlignment &bam_a,
        const BamAlignment &bam_b,
        const RefVector &refs,
        int _weight,
        int _ev_id,
        SV_PairReader *_reader)
{
    reader = _reader;

    if ( bam_a.MapQuality < bam_b.MapQuality )
        min_mapping_quality = bam_a.MapQuality;
    else
        min_mapping_quality = bam_b.MapQuality;

    struct interval tmp_a, tmp_b;
    tmp_a.start = bam_a.Position;
    tmp_a.end = bam_a.GetEndPosition(false, false) - 1;
    tmp_a.chr = refs.at(bam_a.RefID).RefName;

    if ( bam_a.IsReverseStrand() == true )
        tmp_a.strand = '-';
    else
        tmp_a.strand = '+';

    tmp_b.start = bam_b.Position;
    tmp_b.end = bam_b.GetEndPosition(false, false) - 1;
    tmp_b.chr = refs.at(bam_b.RefID).RefName;

    if ( bam_b.IsReverseStrand() == true )
        tmp_b.strand = '-';
    else
        tmp_b.strand = '+';

    //if ( tmp_a.chr.compare(tmp_b.chr) > 0 ) {
    if ( bam_a.RefID < bam_b.RefID ) {
        read_l = tmp_a;
        read_r = tmp_b;
        //} else if ( tmp_a.chr.compare(tmp_b.chr) < 0 ) {
    } else if ( bam_a.RefID > bam_b.RefID) {
        read_l = tmp_b;
        read_r = tmp_a;
    } else { // ==
        if (tmp_a.start > tmp_b.start) {
            read_l = tmp_b;
            read_r = tmp_a;
        } else {
            read_l = tmp_a;
            read_r = tmp_b;
        }
    }

    weight = _weight;
    ev_id = _ev_id;
}
Esempio n. 2
0
void BedCoverage::CollectCoverageBam(string bamFile) {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB->loadBedCovFileIntoMap();

    // open the BAM file
    BamReader reader;
    reader.Open(bamFile);

    // get header & reference information
    string header = reader.GetHeaderText();
    RefVector refs = reader.GetReferenceData();

    // convert each aligned BAM entry to BED
    // and compute coverage on B
    BamAlignment bam;
    while (reader.GetNextAlignment(bam)) {
        if (bam.IsMapped()) {
            // treat the BAM alignment as a single "block"
            if (_obeySplits == false) {
                // construct a new BED entry from the current BAM alignment.
                BED a;
                a.chrom  = refs.at(bam.RefID).RefName;
                a.start  = bam.Position;
                a.end    = bam.GetEndPosition(false, false);
                a.strand = "+";
                if (bam.IsReverseStrand()) a.strand = "-";

                _bedB->countHits(a, _sameStrand, _diffStrand, _countsOnly);
            }
            // split the BAM alignment into discrete blocks and
            // look for overlaps only within each block.
            else {
                // vec to store the discrete BED "blocks" from a
                bedVector bedBlocks;
                // since we are counting coverage, we do want to split blocks when a
                // deletion (D) CIGAR op is encountered (hence the true for the last parm)
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, false, true);
                // use countSplitHits to avoid over-counting each split chunk
                // as distinct read coverage.
                _bedB->countSplitHits(bedBlocks, _sameStrand, _diffStrand, _countsOnly);
            }
        }
    }
    // report the coverage (summary or histogram) for BED B.
    if (_countsOnly == true)
        ReportCounts();
    else 
        ReportCoverage();
    // close the BAM file
    reader.Close();
}
Esempio n. 3
0
//{{{ void process_pair(const BamAlignment &curr,
void
SV_Pair::
process_pair(const BamAlignment &curr,
             const RefVector refs,
             map<string, BamAlignment> &mapped_pairs,
             UCSCBins<SV_BreakPoint*> &r_bin,
             int weight,
             int ev_id,
             SV_PairReader *reader)
{
    if (mapped_pairs.find(curr.Name) == mapped_pairs.end())
        mapped_pairs[curr.Name] = curr;
    else {
        SV_Pair *new_pair = new SV_Pair(mapped_pairs[curr.Name],
                                        curr,
                                        refs,
                                        weight,
                                        ev_id,
                                        reader);
        //cerr << count_clipped(curr.CigarData) << "\t" <<
                //count_clipped(mapped_pairs[curr.Name].CigarData) << endl;
                
        if ( new_pair->is_sane() &&  
             new_pair->is_aberrant() &&
             (count_clipped(curr.CigarData) > 0) &&
             (count_clipped(mapped_pairs[curr.Name].CigarData) > 0) ) {
            SV_BreakPoint *new_bp = new_pair->get_bp();

#ifdef TRACE

            cerr << "READ\t" << 
                    refs.at(mapped_pairs[curr.Name].RefID).RefName << "," <<
                    mapped_pairs[curr.Name].Position << "," <<
                    (mapped_pairs[curr.Name].GetEndPosition(false, false) - 1)
                        << "\t" <<
                    refs.at(curr.RefID).RefName << "," <<
                    curr.Position << "," <<
                    (curr.GetEndPosition(false, false) - 1)
                        <<
                    endl;

            cerr << "\tPE\t" << *new_bp << endl;
#endif
            new_bp->cluster(r_bin);
        } else {
            delete(new_pair);
        }

        mapped_pairs.erase(curr.Name);
    }
}
Esempio n. 4
0
    void getBamBlocks(const BamAlignment &bam, const RefVector &refs,
                      vector<BED> &blocks, bool breakOnDeletionOps) {

        CHRPOS currPosition = bam.Position;
        CHRPOS blockStart   = bam.Position;
        string chrom        = refs.at(bam.RefID).RefName;
        string name         = bam.Name;
        string strand       = "+";
        string score        = ToString(bam.MapQuality);
        char  prevOp        = '\0';
        if (bam.IsReverseStrand()) strand = "-";
        bool blocksFound = false;

        vector<CigarOp>::const_iterator cigItr = bam.CigarData.begin();
        vector<CigarOp>::const_iterator cigEnd = bam.CigarData.end();
        for ( ; cigItr != cigEnd; ++cigItr ) {
            if (cigItr->Type == 'M') {
                currPosition += cigItr->Length;
                // we only want to create a new block if the current M op
                // was preceded by an N op or a D op (and we are breaking on D ops)
                if ((prevOp == 'D' && breakOnDeletionOps == true) || (prevOp == 'N')) {
                    blocks.push_back( BED(chrom, blockStart, currPosition, name, score, strand) );
                    blockStart = currPosition;
                }
            }
            else if (cigItr->Type == 'D') {
                if (breakOnDeletionOps == false)
                    currPosition += cigItr->Length;
                else {
                    blocksFound = true;
                    currPosition += cigItr->Length;
                    blockStart    = currPosition;
                }
            }
            else if (cigItr->Type == 'N') {
                blocks.push_back( BED(chrom, blockStart, currPosition, name, score, strand) );
                blocksFound = true;
                currPosition += cigItr->Length;
                blockStart    = currPosition;
            }
            else if (cigItr->Type == 'S' || cigItr->Type == 'H' || cigItr->Type == 'P' || cigItr->Type == 'I') {
                // do nothing
            }
            else {
                cerr << "Input error: invalid CIGAR type (" << cigItr->Type
                    << ") for: " << bam.Name << endl;
                exit(1);
            }
            prevOp = cigItr->Type;
        }
        // if there were no splits, we just create a block representing the contiguous alignment.
        if (blocksFound == false) {
            blocks.push_back( BED(chrom, bam.Position, currPosition, name, score, strand) );
        }
    }
Esempio n. 5
0
 void getBamBlocks(const BamAlignment &bam, const RefVector &refs, 
                   BedVec &blocks, bool breakOnDeletionOps) {
 
 	CHRPOS currPosition = bam.Position;
     CHRPOS blockStart   = bam.Position;
     string chrom        = refs.at(bam.RefID).RefName;
     string name         = bam.Name;
     string strand       = "+";
     float  score        = bam.MapQuality;
     if (bam.IsReverseStrand()) strand = "-"; 
 	
 	vector<CigarOp>::const_iterator cigItr = bam.CigarData.begin();
 	vector<CigarOp>::const_iterator cigEnd = bam.CigarData.end();
     for ( ; cigItr != cigEnd; ++cigItr ) {
         if (cigItr->Type == 'M') {
             currPosition += cigItr->Length;
 			blocks.push_back( Bed(chrom, blockStart, currPosition, name, score, strand) );
 			blockStart    = currPosition;
         }
         else if (cigItr->Type == 'D') {
             if (breakOnDeletionOps == false)
                 currPosition += cigItr->Length;
             else {
                 currPosition += cigItr->Length;
                 blockStart    = currPosition;
             }
         }
         else if (cigItr->Type == 'N') {
             currPosition += cigItr->Length;
             blockStart    = currPosition;            }
         else if (cigItr->Type == 'S' || cigItr->Type == 'H' || cigItr->Type == 'P' || cigItr->Type == 'I') {
             // do nothing
         }
         else {
             cerr << "Input error: invalid CIGAR type (" << cigItr->Type
 				<< ") for: " << bam.Name << endl;
 			exit(1);
         }
 	}
 }
Esempio n. 6
0
bool RandomTool::RandomToolPrivate::Run(void) {

    // set to default stdin if no input files provided
    if ( !m_settings->HasInput && !m_settings->HasInputFilelist )
        m_settings->InputFiles.push_back(Options::StandardIn());

    // add files in the filelist to the input file list
    if ( m_settings->HasInputFilelist ) {

        ifstream filelist(m_settings->InputFilelist.c_str(), ios::in);
        if ( !filelist.is_open() ) {
            cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl;
            return false;
        }

        string line;
        while ( getline(filelist, line) )
            m_settings->InputFiles.push_back(line);
    }

    // open our reader
    BamMultiReader reader;
    if ( !reader.Open(m_settings->InputFiles) ) {
        cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl;
        return false;
    }

    // look up index files for all BAM files
    reader.LocateIndexes();

    // make sure index data is available
    if ( !reader.HasIndexes() ) {
        cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl;
        reader.Close();
        return false;
    }

    // get BamReader metadata
    const string headerText = reader.GetHeaderText();
    const RefVector references = reader.GetReferenceData();
    if ( references.empty() ) {
        cerr << "bamtools random ERROR: no reference data available... Aborting." << endl;
        reader.Close();
        return false;
    }

    // determine compression mode for BamWriter
    bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() &&
                              !m_settings->IsForceCompression );
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
    if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed;

    // open BamWriter
    BamWriter writer;
    writer.SetCompressionMode(compressionMode);
    if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) {
        cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename
             << " for writing... Aborting." << endl;
        reader.Close();
        return false;
    }

    // if user specified a REGION constraint, attempt to parse REGION string
    BamRegion region;
    if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) {
        cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl;
        cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid"
             << endl;
        reader.Close();
        writer.Close();
        return false;
    }

    // seed our random number generator
    srand( time(NULL) );

    // grab random alignments
    BamAlignment al;
    unsigned int i = 0;
    while ( i < m_settings->AlignmentCount ) {

        int randomRefId    = 0;
        int randomPosition = 0;

        // use REGION constraints to select random refId & position
        if ( m_settings->HasRegion ) {

            // select a random refId
            randomRefId = getRandomInt(region.LeftRefID, region.RightRefID);

            // select a random position based on randomRefId
            const int lowerBoundPosition = ( (randomRefId == region.LeftRefID)
                                             ? region.LeftPosition
                                             : 0 );
            const int upperBoundPosition = ( (randomRefId == region.RightRefID)
                                             ? region.RightPosition
                                             : (references.at(randomRefId).RefLength - 1) );
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // otherwise select from all possible random refId & position
        else {

            // select random refId
            randomRefId = getRandomInt(0, (int)references.size() - 1);

            // select random position based on randomRefId
            const int lowerBoundPosition = 0;
            const int upperBoundPosition = references.at(randomRefId).RefLength - 1;
            randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition);
        }

        // if jump & read successful, save first alignment that overlaps random refId & position
        if ( reader.Jump(randomRefId, randomPosition) ) {
            while ( reader.GetNextAlignmentCore(al) ) {
                if ( al.RefID == randomRefId && al.Position >= randomPosition ) {
                    writer.SaveAlignment(al);
                    ++i;
                    break;
                }
            }
        }
    }

    // cleanup & exit
    reader.Close();
    writer.Close();
    return true;
}
Esempio n. 7
0
//{{{ SV_SplitRead:: SV_SplitRead(vector< BamAlignment > &block,
SV_SplitRead::
SV_SplitRead(const BamAlignment &bam_a,
             const BamAlignment &bam_b,
             const RefVector &refs,
             int _weight,
             int _id,
             int _sample_id,
             SV_SplitReadReader *_reader)
{
    reader = _reader;
    sample_id = _sample_id;

    if ( bam_a.MapQuality < bam_b.MapQuality )
        min_mapping_quality = bam_a.MapQuality;
    else
        min_mapping_quality = bam_b.MapQuality;

    struct cigar_query query_a =
        calc_query_pos_from_cigar(bam_a.CigarData,
                                  bam_a.IsReverseStrand() );
    struct cigar_query query_b =
        calc_query_pos_from_cigar(bam_b.CigarData,
                                  bam_b.IsReverseStrand() );

    struct interval tmp_a, tmp_b;

    tmp_a.strand = '+';
    if (bam_a.IsReverseStrand())
        tmp_a.strand = '-';
    tmp_a.chr = refs.at(bam_a.RefID).RefName;
    tmp_a.start = bam_a.Position;
    tmp_a.end = bam_a.GetEndPosition();

    tmp_b.strand = '+';
    if (bam_b.IsReverseStrand())
        tmp_b.strand = '-';
    tmp_b.chr = refs.at(bam_b.RefID).RefName;
    tmp_b.start = bam_b.Position;
    tmp_b.end = bam_b.GetEndPosition();


    //if ( ( tmp_a.chr.compare(tmp_b.chr) > 0 ) ||
    //( ( tmp_a.chr.compare(tmp_b.chr) == 0 ) &&
    //( tmp_a.start > tmp_b.start ) ) ) {

    if ( (bam_a.RefID > bam_b.RefID) ||
            ( (bam_a.RefID == bam_b.RefID) &&
              (tmp_a.start > tmp_b.start ) ) ) {
        side_r = tmp_a;
        side_l = tmp_b;
        query_r = query_a;
        query_l = query_b;
    } else {
        side_l = tmp_a;
        side_r = tmp_b;
        query_l = query_a;
        query_r = query_b;
    }

    if (side_l.strand != side_r.strand)
        type = SV_BreakPoint::INVERSION;
    else if ( (	( side_l.strand == '+' ) &&
                ( side_r.strand == '+' ) &&
                ( query_l.qs_pos < query_r.qs_pos ) ) ||
              (	( side_l.strand == '-' ) &&
                  ( side_r.strand == '-' ) &&
                  ( query_l.qs_pos > query_r.qs_pos) ) )
        type = SV_BreakPoint::DELETION;
    else if ( ( ( side_l.strand == '+' ) &&
                ( side_r.strand == '+' ) &&
                ( query_l.qs_pos > query_r.qs_pos ) ) ||
              ( ( side_l.strand == '-' ) &&
                ( side_r.strand == '-' ) &&
                ( query_l.qs_pos < query_r.qs_pos) ) )
        type = SV_BreakPoint::DUPLICATION;
    else {
        cerr << "ERROR IN BAM FILE.  " <<
             "TYPE not detected (DELETION,DUPLICATION,INVERSION)" <<
             endl;
        cerr << "\t" << query_l.qs_pos << "," << side_l.strand << "\t" <<
             query_r.qs_pos << "," << side_r.strand << "\t" <<
             tmp_a.chr << "," << tmp_a.start << "," << tmp_a.end << "\t" <<
             tmp_b.chr << "," << tmp_b.start << "," << tmp_b.end << "\t" <<
             endl;

        throw(1);
    }

    weight = _weight;
    id = _id;
}
Esempio n. 8
0
// Same as ParseRegionString() above, but accepts a BamMultiReader
bool ParseRegionString(const string& regionString,
                                  const BamReader& reader,
                                  BamRegion& region)
{
    // -------------------------------
    // parse region string

    // check first for empty string
    if ( regionString.empty() )
        return false;

    //cerr << "ParseRegionString Input: " << regionString << endl;

    // non-empty string, look for a colom
    size_t foundFirstColon = regionString.find(':');

    // store chrom strings, and numeric positions
    string startChrom;
    string stopChrom;
    int startPos;
    int stopPos;

    // no colon found
    // going to use entire contents of requested chromosome
    // just store entire region string as startChrom name
    // use BamReader methods to check if its valid for current BAM file
    if ( foundFirstColon == string::npos ) {
        startChrom = regionString;
        startPos   = 0;
        stopChrom  = regionString;
        stopPos    = -1;
    }

    // colon found, so we at least have some sort of startPos requested
    else {
        // store start chrom from beginning to first colon
        startChrom = regionString.substr(0,foundFirstColon);

        // look for ".." after the colon
        size_t foundRangeDots = regionString.find("..", foundFirstColon+1);

        // no dots found
        // so we have a startPos but no range
        // store contents before colon as startChrom, after as startPos
        if ( foundRangeDots == string::npos )
        {
            startPos   = atoi( regionString.substr(foundFirstColon+1).c_str() );
            stopChrom  = startChrom;
            stopPos    = -1;
        }

        // ".." found, so we have some sort of range selected
        else {

            // store startPos between first colon and range dots ".."
            startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );

            // look for second colon
            size_t foundSecondColon = regionString.find(':', foundRangeDots+1);

            // no second colon found
            // so we have a "standard" chrom:start..stop input format (on single chrom)
            if ( foundSecondColon == string::npos ) {
                stopChrom  = startChrom;
                stopPos    = atoi( regionString.substr(foundRangeDots+2).c_str() );
            }

            // second colon found
            // so we have a range requested across 2 chrom's
            else {
                stopChrom  = regionString.substr(foundRangeDots+2, foundSecondColon-(foundRangeDots+2));
                stopPos    = atoi( regionString.substr(foundSecondColon+1).c_str() );
            }
        }
    }
    
    // -------------------------------
    // validate reference IDs & genomic positions
    const RefVector references = reader.GetReferenceData();

    // if startRefID not found, return false
    int startRefID = reader.GetReferenceID(startChrom);
    if ( startRefID == -1 ) return false;
    // startPos cannot be greater than or equal to reference length
    const RefData& startReference = references.at(startRefID);
    if ( startPos >= startReference.RefLength ) return false;

    // if stopRefID not found, return false
    int stopRefID = reader.GetReferenceID(stopChrom);
    if ( stopRefID == -1 ) return false;

    // stopPosition cannot be larger than reference length
    const RefData& stopReference = references.at(stopRefID);
    if ( stopPos > stopReference.RefLength ) return false;

    // if no stopPosition specified, set to reference end
    if ( stopPos == -1 ) stopPos = stopReference.RefLength;

    // -------------------------------
    // set up Region struct & return

    region.LeftRefID     = startRefID;
    region.LeftPosition  = startPos;
    region.RightRefID    = stopRefID;;
    region.RightPosition = stopPos;

    //cerr << "ParseRegionString " << region.LeftRefID <<  " " << region.LeftPosition << " " << region.RightPosition << endl;
    return true;
}
Esempio n. 9
0
void BedIntersect::IntersectBam(string bamFile) {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB = new BedFile(_bedBFile);
    _bedB->loadBedFileIntoMap();

    // create a dummy BED A file for printing purposes if not
    // using BAM output.
    if (_bamOutput == false) {
        _bedA = new BedFile(_bedAFile);
        _bedA->bedType = 12;
    }
    // open the BAM file
    BamReader reader;
    BamWriter writer;
    reader.Open(bamFile);
    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs    = reader.GetReferenceData();
    // open a BAM output to stdout if we are writing BAM
    if (_bamOutput == true) {
        // set compression mode
        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
        writer.SetCompressionMode(compressionMode);
        // open our BAM writer
        writer.Open("stdout", bamHeader, refs);
    }
    vector<BED> hits;
    // reserve some space
    hits.reserve(100);
    BamAlignment bam;    
    // get each set of alignments for each pair.
    while (reader.GetNextAlignment(bam)) {

        // save an unaligned read if -v
        if (!bam.IsMapped()) {
            if (_noHit == true)
                writer.SaveAlignment(bam);
            continue;
        }   
        // break alignment into discrete blocks,
        bedVector bed_blocks;
        string chrom = refs.at(bam.RefID).RefName;
        GetBamBlocks(bam, chrom, bed_blocks, false, true);
        // create a basic BED entry from the BAM alignment
        BED bed;
        MakeBedFromBam(bam, chrom, bed_blocks, bed);
        bool overlapsFound = false;
        if ((_bamOutput == true) && (_obeySplits == false))
        {
            overlapsFound = _bedB->anyHits(bed.chrom, bed.start, bed.end, 
                                           bed.strand, _sameStrand, _diffStrand,
                                           _overlapFraction, _reciprocal);
        }
        else if ( ((_bamOutput == true)  && (_obeySplits == true)) ||
                  ((_bamOutput == false) && (_obeySplits == true)) )
        {
            // find the hits that overlap with the full span of the blocked BED
            _bedB->allHits(bed.chrom, bed.start, bed.end, bed.strand,
                           hits, _sameStrand, _diffStrand,
                           _overlapFraction, _reciprocal);
            // find the overlaps between the block in A and B
            overlapsFound = FindBlockedOverlaps(bed, bed_blocks, hits, _bamOutput);
        }
        else if ((_bamOutput == false) && (_obeySplits == false))
        {
            FindOverlaps(bed, hits);
        }
        // save the BAM alignment if overlap reqs. were met
        if (_bamOutput == true) {
            if ((overlapsFound == true) && (_noHit == false))
                writer.SaveAlignment(bam);
            else if ((overlapsFound == false) && (_noHit == true))
                writer.SaveAlignment(bam);
        }
        hits.clear();
    }

    // close the relevant BAM files.
    reader.Close();
    if (_bamOutput == true) {
        writer.Close();
    }
}
Esempio n. 10
0
void TagBam::Tag() {

    // open the annotations files for processing;
    OpenAnnoFiles();

    // open the BAM file
    BamReader reader;
    BamWriter writer;
	if (!reader.Open(_bamFile)) {
        cerr << "Failed to open BAM file " << _bamFile << endl;
        exit(1);
    }
    
    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs = reader.GetReferenceData();

    // set compression mode
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
//    if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
    writer.SetCompressionMode(compressionMode);
    // open our BAM writer
    writer.Open("stdout", bamHeader, refs);

    // rip through the BAM file and test for overlaps with each annotation file.
    BamAlignment al;
    vector<BED> hits;

    while (reader.GetNextAlignment(al)) {
        if (al.IsMapped() == true) {
            BED a;
            a.chrom = refs.at(al.RefID).RefName;
            a.start = al.Position;
            a.end   = al.GetEndPosition(false, false);
            a.strand = "+";
            if (al.IsReverseStrand()) a.strand = "-";
            
            ostringstream annotations;
            // annotate the BAM file based on overlaps with the annotation files.
            for (size_t i = 0; i < _annoFiles.size(); ++i) 
            {
                // grab the current annotation file.
                BedFile *anno = _annoFiles[i];
                
                if (!_useNames && !_useScores && !_useIntervals) {
                    // add the label for this annotation file to tag if there is overlap
                    if (anno->anyHits(a.chrom, a.start, a.end, a.strand, 
                                      _sameStrand, _diffStrand, _overlapFraction, false))
                    {
                        annotations << _annoLabels[i] << ";";
                    }
                }
                // use the score field
                else if (!_useNames && _useScores && !_useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand, 0.0, false);
                    for (size_t i = 0; i < hits.size(); ++i) {
                        annotations << hits[i].score;
                        if (i < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
                // use the name field from the annotation files to populate tag
                else if (_useNames && !_useScores && !_useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand, 0.0, false);
                    for (size_t j = 0; j < hits.size(); ++j) {
                        annotations << hits[j].name;
                        if (j < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
                // use the full interval information annotation files to populate tag
                else if (!_useNames && !_useScores && _useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand,  0.0, false);
                    for (size_t j = 0; j < hits.size(); ++j) {
                        annotations << _annoLabels[i]  << ":" << 
                                        hits[j].chrom  << ":" <<
                                        hits[j].start  << "-" <<
                                        hits[j].end    << "," <<
                                        hits[j].name   << "," <<
                                        hits[j].score  << "," <<
                                        hits[j].strand;
                        if (j < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
            }
            // were there any overlaps with which to make a tag?
            if (annotations.str().size() > 0) {
                al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
            }
        }
        writer.SaveAlignment(al);
    }
    reader.Close();
    writer.Close();
    // close the annotations files;
    CloseAnnoFiles();
}
Esempio n. 11
0
// this has been copied from bamtools utilities, since it isn't in the API. Original file is bamtools_utilities.cpp.
// Like the rest of Bamtools, it is under the BSD license.
bool Filter::ParseRegionString(const string& regionString, BamRegion& region)
{
    // -------------------------------
    // parse region string
    
    // check first for empty string
    if ( regionString.empty() ) 
        return false;   
    
    // non-empty string, look for a colom
    size_t foundFirstColon = regionString.find(':');
    
    // store chrom strings, and numeric positions
    string chrom;
    int startPos;
    int stopPos;
    
    // no colon found
    // going to use entire contents of requested chromosome 
    // just store entire region string as startChrom name
    // use BamReader methods to check if its valid for current BAM file
    if ( foundFirstColon == string::npos ) {
        chrom = regionString;
        startPos   = 0;
        stopPos    = -1;
    }
    
    // colon found, so we at least have some sort of startPos requested
    else {
        
        // store start chrom from beginning to first colon
        chrom = regionString.substr(0,foundFirstColon);
        
        // look for ".." after the colon
        size_t foundRangeDots = regionString.find("..", foundFirstColon+1);
        
        // no dots found
        // so we have a startPos but no range
        // store contents before colon as startChrom, after as startPos
        if ( foundRangeDots == string::npos ) {
            startPos   = atoi( regionString.substr(foundFirstColon+1).c_str() ); 
            stopPos    = -1;
        } 
        
        // ".." found, so we have some sort of range selected
        else {
            
            // store startPos between first colon and range dots ".."
            startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() );
            
            // look for second colon
            size_t foundSecondColon = regionString.find(':', foundRangeDots+1);
            
            // no second colon found
            // so we have a "standard" chrom:start..stop input format (on single chrom)
            if ( foundSecondColon == string::npos ) {
                stopPos    = atoi( regionString.substr(foundRangeDots+2).c_str() );
            } else {
                return false;
            }
        }
    }
    
    // -------------------------------
    // validate reference IDs & genomic positions
    
    const RefVector references = getReferences();
    
    int RefID = -1;
    for(int i = 0; i < references.size(); i++) {
        if(references[i].RefName == chrom)
            RefID = i;
    }
    
    // if startRefID not found, return false
    if ( RefID == -1 ) {
        cerr << "Can't find chromosome'" << chrom << "'" << endl;
        return false;
    }
    
    // startPos cannot be greater than or equal to reference length
    const RefData& startReference = references.at(RefID);
    if ( startPos >= startReference.RefLength ) {
        cerr << "Start position (" << startPos << ") after end of the reference sequence (" << startReference.RefLength << ")" << endl;
        return false;
    }
    
    // stopPosition cannot be larger than reference length
    const RefData& stopReference = references.at(RefID);
    if ( stopPos > stopReference.RefLength ) {
        cerr << "Start position (" << stopPos << ") after end of the reference sequence (" << stopReference.RefLength << ")" << endl;
        return false;
    }

    // if no stopPosition specified, set to reference end
    if ( stopPos == -1 ) stopPos = stopReference.RefLength;
    
    // -------------------------------
    // set up Region struct & return
    
    region.LeftRefID     = RefID;
    region.LeftPosition  = startPos;
    region.RightRefID    = RefID;;
    region.RightPosition = stopPos;
    return true;
}
Esempio n. 12
0
 bool check(const PropertyFilter& filter, const BamAlignment& al) {
   
     bool keepAlignment = true;
     const PropertyMap& properties = filter.Properties;
     PropertyMap::const_iterator propertyIter = properties.begin();
     PropertyMap::const_iterator propertyEnd  = properties.end();
     for ( ; propertyIter != propertyEnd; ++propertyIter ) {
       
         // check alignment data field depending on propertyName
         const string& propertyName = (*propertyIter).first;
         const PropertyFilterValue& valueFilter = (*propertyIter).second;
         
         if      ( propertyName == ALIGNMENTFLAG_PROPERTY )  keepAlignment &= valueFilter.check(al.AlignmentFlag);
         else if ( propertyName == CIGAR_PROPERTY ) {
             stringstream cigarSs;
             const vector<CigarOp>& cigarData = al.CigarData;
             if ( !cigarData.empty() ) {
                 vector<CigarOp>::const_iterator cigarBegin = cigarData.begin();
                 vector<CigarOp>::const_iterator cigarIter = cigarBegin;
                 vector<CigarOp>::const_iterator cigarEnd  = cigarData.end();
                 for ( ; cigarIter != cigarEnd; ++cigarIter ) {
                     const CigarOp& op = (*cigarIter);
                     cigarSs << op.Length << op.Type;
                 }
                 keepAlignment &= valueFilter.check(cigarSs.str());
             }
         }
         else if ( propertyName == INSERTSIZE_PROPERTY )           keepAlignment &= valueFilter.check(al.InsertSize);
         else if ( propertyName == ISDUPLICATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsDuplicate());
         else if ( propertyName == ISFAILEDQC_PROPERTY )           keepAlignment &= valueFilter.check(al.IsFailedQC());
         else if ( propertyName == ISFIRSTMATE_PROPERTY )          keepAlignment &= valueFilter.check(al.IsFirstMate());
         else if ( propertyName == ISMAPPED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsMapped());
         else if ( propertyName == ISMATEMAPPED_PROPERTY )         keepAlignment &= valueFilter.check(al.IsMateMapped());
         else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY )  keepAlignment &= valueFilter.check(al.IsMateReverseStrand());
         else if ( propertyName == ISPAIRED_PROPERTY )             keepAlignment &= valueFilter.check(al.IsPaired());
         else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY )   keepAlignment &= valueFilter.check(al.IsPrimaryAlignment());
         else if ( propertyName == ISPROPERPAIR_PROPERTY )         keepAlignment &= valueFilter.check(al.IsProperPair());
         else if ( propertyName == ISREVERSESTRAND_PROPERTY )      keepAlignment &= valueFilter.check(al.IsReverseStrand());
         else if ( propertyName == ISSECONDMATE_PROPERTY )         keepAlignment &= valueFilter.check(al.IsSecondMate());
         else if ( propertyName == ISSINGLETON_PROPERTY ) {
             const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped();
             keepAlignment &= valueFilter.check(isSingleton);
         }
         else if ( propertyName == MAPQUALITY_PROPERTY )           keepAlignment &= valueFilter.check(al.MapQuality);
         else if ( propertyName == MATEPOSITION_PROPERTY )         keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) );
         else if ( propertyName == MATEREFERENCE_PROPERTY ) {
             if ( !al.IsPaired() || !al.IsMateMapped() ) return false;
             BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID");
             const string& refName = filterToolReferences.at(al.MateRefID).RefName;
             keepAlignment &= valueFilter.check(refName);
         }
         else if ( propertyName == NAME_PROPERTY )                 keepAlignment &= valueFilter.check(al.Name);
         else if ( propertyName == POSITION_PROPERTY )             keepAlignment &= valueFilter.check(al.Position);
         else if ( propertyName == QUERYBASES_PROPERTY )           keepAlignment &= valueFilter.check(al.QueryBases);
         else if ( propertyName == REFERENCE_PROPERTY ) {
             BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID");
             const string& refName = filterToolReferences.at(al.RefID).RefName;
             keepAlignment &= valueFilter.check(refName);
         }
         else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al);
         else BAMTOOLS_ASSERT_UNREACHABLE;
         
         // if alignment fails at ANY point, just quit and return false
         if ( !keepAlignment ) return false;
     }
   
     BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here");
     return keepAlignment;
 }
Esempio n. 13
0
void BedWindow::WindowIntersectBam(string bamFile) {

    // load the "B" bed file into a map so
    // that we can easily compare "A" to it for overlaps
    _bedB->loadBedFileIntoMap();

    // open the BAM file
    BamReader reader;
    BamWriter writer;
    reader.Open(bamFile);

    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs    = reader.GetReferenceData();

    // open a BAM output to stdout if we are writing BAM
    if (_bamOutput == true) {
        // set compression mode
        BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
        if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
        writer.SetCompressionMode(compressionMode);
        // open our BAM writer
        writer.Open("stdout", bamHeader, refs);
    }

    vector<BED> hits;                   // vector of potential hits
    // reserve some space
    hits.reserve(100);

    _bedA->bedType = 6;
    BamAlignment bam;
    bool overlapsFound;
    // get each set of alignments for each pair.
    while (reader.GetNextAlignment(bam)) {

        if (bam.IsMapped()) {
            BED a;
            a.chrom = refs.at(bam.RefID).RefName;
            a.start = bam.Position;
            a.end   = bam.GetEndPosition(false, false);

            // build the name field from the BAM alignment.
            a.name = bam.Name;
            if (bam.IsFirstMate()) a.name += "/1";
            if (bam.IsSecondMate()) a.name += "/2";

            a.score  = ToString(bam.MapQuality);
            a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-";

            if (_bamOutput == true) {
                overlapsFound = FindOneOrMoreWindowOverlaps(a);
                if (overlapsFound == true) {
                    if (_noHit == false)
                        writer.SaveAlignment(bam);
                }
                else {
                    if (_noHit == true)
                        writer.SaveAlignment(bam);
                }
            }
            else {
                FindWindowOverlaps(a, hits);
                hits.clear();
            }
        }
        // BAM IsMapped() is false
        else if (_noHit == true) {
            writer.SaveAlignment(bam);
        }
    }

    // close the relevant BAM files.
    reader.Close();
    if (_bamOutput == true) {
        writer.Close();
    }
}
Esempio n. 14
0
void BedGenomeCoverage::CoverageBam(string bamFile) {

    ResetChromCoverage();

    // open the BAM file
    BamReader reader;
    if (!reader.Open(bamFile)) {
        cerr << "Failed to open BAM file " << bamFile << endl;
        exit(1);
    }

    // get header & reference information
    string header = reader.GetHeaderText();
    RefVector refs = reader.GetReferenceData();

    // load the BAM header references into a BEDTools "genome file"
    _genome = new GenomeFile(refs);
    // convert each aligned BAM entry to BED
    // and compute coverage on B
    BamAlignment bam;
    while (reader.GetNextAlignment(bam)) {
        // skip if the read is unaligned
        if (bam.IsMapped() == false)
            continue;

        bool _isReverseStrand = bam.IsReverseStrand();

        //changing second mate's strand to opposite
        if( _dUTP && bam.IsPaired() && bam.IsMateMapped() && bam.IsSecondMate())
            _isReverseStrand = !bam.IsReverseStrand();

        // skip if we care about strands and the strand isn't what
        // the user wanted
        if ( (_filterByStrand == true) &&
             ((_requestedStrand == "-") != _isReverseStrand) )
            continue;

        // extract the chrom, start and end from the BAM alignment
        string chrom(refs.at(bam.RefID).RefName);
        CHRPOS start = bam.Position;
        CHRPOS end = bam.GetEndPosition(false, false) - 1;

        // are we on a new chromosome?
        if ( chrom != _currChromName )
            StartNewChrom(chrom);
        if(_pair_chip_) {
            // Skip if not a proper pair
            if (bam.IsPaired() && (!bam.IsProperPair() or !bam.IsMateMapped()) )
                continue;
            // Skip if wrong coordinates
            if( ( (bam.Position<bam.MatePosition) && bam.IsReverseStrand() ) ||
                ( (bam.MatePosition < bam.Position) && bam.IsMateReverseStrand() ) ) {
                    //chemically designed: left on positive strand, right on reverse one
                    continue;
            }

            /*if(_haveSize) {
                if (bam.IsFirstMate() && bam.IsReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
                    int mid = bam.MatePosition+abs(bam.InsertSize)/2;
                    if(mid<_fragmentSize/2)
                        AddCoverage(0, mid+_fragmentSize/2);
                    else
                        AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
                }
                else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //put fragmentSize in to the middle of pair end_fragment
                    int mid = start+abs(bam.InsertSize)/2;
                    if(mid<_fragmentSize/2)
                        AddCoverage(0, mid+_fragmentSize/2);
                    else
                        AddCoverage(mid-_fragmentSize/2, mid+_fragmentSize/2);
                }
            } else */

            if (bam.IsFirstMate() && bam.IsReverseStrand()) { //prolong to the mate to the left
                AddCoverage(bam.MatePosition, end);
            }
            else if (bam.IsFirstMate() && bam.IsMateReverseStrand()) { //prolong to the mate to the right
                AddCoverage(start, start + abs(bam.InsertSize) - 1);
            }
        } else if (_haveSize) {
            if(bam.IsReverseStrand()) {
                if(end<_fragmentSize) { //sometimes fragmentSize is bigger :(
                    AddCoverage(0, end);
                } else {
                    AddCoverage(end + 1 - _fragmentSize, end );
                }
            } else {
                AddCoverage(start,start+_fragmentSize - 1);
            }
        } else
        // add coverage accordingly.
        if (!_only_5p_end && !_only_3p_end) {
            bedVector bedBlocks;
            // we always want to split blocks when a D CIGAR op is found.
            // if the user invokes -split, we want to also split on N ops.
            if (_obeySplits) { // "D" true, "N" true
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, true);
            }
            else { // "D" true, "N" false
                GetBamBlocks(bam, refs.at(bam.RefID).RefName, bedBlocks, true, false);
            }
            AddBlockedCoverage(bedBlocks);
        }
        else if (_only_5p_end) {
            CHRPOS pos = ( !bam.IsReverseStrand() ) ? start : end;
            AddCoverage(pos,pos);
        }
        else if (_only_3p_end) {
            CHRPOS pos = ( bam.IsReverseStrand() ) ? start : end;
            AddCoverage(pos,pos);
        }
    }
    // close the BAM
    reader.Close();

    // process the results of the last chromosome.
    ReportChromCoverage(_currChromCoverage, _currChromSize,
            _currChromName, _currChromDepthHist);

    // report all empty chromsomes
    PrintEmptyChromosomes();

    // report the overall coverage if asked.
    PrintFinalCoverage();
}
Esempio n. 15
0
void BedIntersect::IntersectBam(string bamFile) {

	// load the "B" bed file into a map so
	// that we can easily compare "A" to it for overlaps
	_bedB->loadBedFileIntoMap();
	
	// open the BAM file
	BamReader reader;
	BamWriter writer;
	reader.Open(bamFile);

	// get header & reference information
	string header  = reader.GetHeaderText();
	RefVector refs = reader.GetReferenceData();

	// open a BAM output to stdout if we are writing BAM
	if (_bamOutput == true) {
		// open our BAM writer
        writer.Open("stdout", header, refs, _isUncompressedBam);
	}

	vector<BED> hits;
	// reserve some space
	hits.reserve(100);
	
	_bedA->bedType = 6;
	BamAlignment bam;	
	// get each set of alignments for each pair.
	while (reader.GetNextAlignment(bam)) {
		
		if (bam.IsMapped()) {	
			BED a;
			a.chrom = refs.at(bam.RefID).RefName;
			a.start = bam.Position;
			a.end   = bam.GetEndPosition(false);

			// build the name field from the BAM alignment.
			a.name = bam.Name;
			if (bam.IsFirstMate()) a.name += "/1";
			if (bam.IsSecondMate()) a.name += "/2";

			a.score  = ToString(bam.MapQuality);
			
			a.strand = "+"; 
			if (bam.IsReverseStrand()) a.strand = "-"; 
	
			if (_bamOutput == true) {
			    bool overlapsFound = false;
			    // treat the BAM alignment as a single "block"
			    if (_obeySplits == false) {
				    overlapsFound = FindOneOrMoreOverlap(a);
				}
				// split the BAM alignment into discrete blocks and
				// look for overlaps only within each block.
				else {
                    bool overlapFoundForBlock;
				    bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
				    // we don't want to split on "D" ops, hence the "false"
                    getBamBlocks(bam, refs, bedBlocks, false);
                    
                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        overlapFoundForBlock = FindOneOrMoreOverlap(a);
            	        if (overlapFoundForBlock == true)
                            overlapsFound = true;
            	    }
				}
				if (overlapsFound == true) {
					if (_noHit == false)
						writer.SaveAlignment(bam);
				}
				else {
					if (_noHit == true) {
						writer.SaveAlignment(bam);
					}	
				}
			}
			else {
			    // treat the BAM alignment as a single BED "block"
			    if (_obeySplits == false) {
				    FindOverlaps(a, hits);
				    hits.clear();
			    }
			    // split the BAM alignment into discrete BED blocks and
				// look for overlaps only within each block.
			    else {
			        bedVector bedBlocks;  // vec to store the discrete BED "blocks" from a
                    getBamBlocks(bam, refs, bedBlocks, false);

                    vector<BED>::const_iterator bedItr  = bedBlocks.begin();
                	vector<BED>::const_iterator bedEnd  = bedBlocks.end();
                	for (; bedItr != bedEnd; ++bedItr) {
            	        FindOverlaps(*bedItr, hits);
                        hits.clear();
            	    }
			    }
			}
		}
	}
	
	// close the relevant BAM files.
	reader.Close();
	if (_bamOutput == true) {
		writer.Close();
	}
}
Esempio n. 16
0
int main ( int argc, char *argv[] ) { 

  struct parameters *param = 0;
  param = interface(param, argc, argv);

  //region file input (the region file should be sorted as the same way as the bam file)
  ifstream region_f;
  region_f.open(param->region_f, ios_base::in);  // the region file is opened

  //bam input and generate index if not yet 
  //-------------------------------------------------------------------------------------------------------+
  // BAM input (file or filenames?)                                                                        |
  //-------------------------------------------------------------------------------------------------------+
  char *fof = param->mapping_f;
  FILE *IN=NULL;
  char linefof[5000];
  int filecount=0;
  vector <string> fnames;

  if (strchr(fof,' ')!=NULL) {
    char *ptr;
    ptr=strtok(fof," ");
    while (ptr!=NULL) {
      fnames.push_back(ptr);
      filecount++;
      ptr=strtok(NULL," ");
    }
  } else {
    IN=fopen(fof,"rt");
    if (IN!=NULL) {
      long linecount=0;
      while (fgets(linefof,5000-1,IN)!=NULL) {
        linecount++;
        if (linefof[0]!='#' && linefof[0]!='\n') {
          char *ptr=strchr(linefof,'\n');
          if (ptr!=NULL && ptr[0]=='\n') {
            ptr[0]='\0';
          }
          FILE *dummy=NULL;
          dummy=fopen(linefof,"rt");
          if (dummy!=NULL) {     // seems to be a file of filenames...
            fclose(dummy);
            fnames.push_back(linefof);
            filecount++;
          } else if (filecount==0 || linecount>=1000-1) {  // seems to be a single file
            fnames.push_back(fof);
            filecount++;
            break;
          }
        }
      }
      fclose(IN);
    }
  }  //file or file name decided and stored in vector "fnames"

  cerr << "the input mapping files are:" << endl;
  vector <string>::iterator fit = fnames.begin();
  for(; fit != fnames.end(); fit++) {
    cerr << *fit << endl;
  }

  //-------------------------------------------------------------------------------------------------------+
  // end of file or filenames                                                                              |
  //-------------------------------------------------------------------------------------------------------+

  // open the BAM file(s)
  BamMultiReader reader;
  reader.Open(fnames);

  // get header & reference information
  string header = reader.GetHeaderText();
  RefVector refs = reader.GetReferenceData();

  if ( ! reader.LocateIndexes() )     // opens any existing index files that match our BAM files
    reader.CreateIndexes();         // creates index files for BAM files that still lack one


  // locus bias
  struct lb empty_profile = {0,0,0,0};
  vector <struct lb> locus_b(1000, empty_profile);
  // output locus bias file
  string locus_bias_set = param->lbias;
  ofstream locus_bias;
  if ( locus_bias_set != "" ) {
    locus_bias.open(param->lbias);
    if ( !locus_bias ) {
      cerr << "can not open locus_bias file.\n";
      exit(0);
    }
  }

  //should decide which chromosome
  string line;
  string old_chr = "SRP";
  string type = param->type;

  //whether do some position-level pile-up stuff
  bool posc = false;
  ofstream posc_f;
  ofstream chrmap_f;
  string poscset = param->posc;
  if ( poscset != "" ) {
    posc = true;
    posc_f.open(param->posc);
    chrmap_f.open(param->chrmap);
  }

  bool noChr;
  if ( param->nochr == 1 ){
    noChr = true;
  } else {
    noChr = false;
  }

  //regions for the input of region file
  deque <struct region> regions;

  getline(region_f, line); //get the first line
  eatline(line,regions,noChr);
  
  deque <struct region>::iterator it = regions.begin();

  while ( it->chr != old_chr ) {

    old_chr = it->chr;  // set the current chr as old chr

    int chr_id  = reader.GetReferenceID(it->chr);

    if ( chr_id == -1 ) {  //reference not found

      for (; it != regions.end() && it->chr == old_chr; ) {
        gene_processing(*it,locus_b);           // print the old region info
        it = regions.erase(it);         // erase the current region
      }
  
      while ( regions.empty() ) {    
        getline(region_f, line);
        if ( region_f.eof() ){
          cerr << "finished: end of region file, zone 0" << endl;
          break;
        }
        eatline(line, regions,noChr);
        it = regions.begin();
        if (it->chr == old_chr){  
          gene_processing(*it,locus_b);      
          regions.clear();
          continue;
        }
      }
      continue;
    }

    int chr_len = refs.at(chr_id).RefLength;

    if ( !reader.SetRegion(chr_id, 1, chr_id, chr_len) ) // here set region
      {
        cerr << "bamtools count ERROR: Jump region failed " << it->chr << endl;
        reader.Close();
        exit(1);
      }

    //pile-up pos stats
    set <string> fragment;
    map <string, unsigned int> pileup;
    bool isposPileup = false;
    unsigned int old_start   = 0;
    unsigned int total_tags  = 0;
    unsigned int total_pos   = 0;
    unsigned int pileup_pos  = 0;


    BamAlignment bam;
    while (reader.GetNextAlignment(bam)) {

      if ( bam.IsMapped() == false ) continue;   // skip unaligned reads

      unsigned int unique;
      bam.GetTag("NH", unique);
      if (param->unique == 1) {
        if (unique != 1) {                       // skipe uniquelly mapped reads
          continue;
        }
      }

      if (read_length == 0){
        read_length = bam.Length;
      }

      //cout << bam.Name << endl;
      string chrom = refs.at(bam.RefID).RefName;
      string strand = "+";
      if (bam.IsReverseStrand()) strand = "-";

      unsigned int alignmentStart =  bam.Position+1;
      unsigned int mateStart;
      if (type == "p") mateStart = bam.MatePosition+1;
      unsigned int alignmentEnd = bam.GetEndPosition();
      unsigned int cigarEnd;
      vector <int> blockLengths;
      vector <int> blockStarts;
      blockStarts.push_back(0);
      ParseCigar(bam.CigarData, blockStarts, blockLengths, cigarEnd);


      // position check for unique mapped reads (because is paired-end reads, shoule base on fragment level for paired end reads)
      if (posc == true && unique == 1) {

        if (type == "p" && fragment.count(bam.Name) > 0) 
          fragment.erase(bam.Name);

        else {

          total_tags++;
          if (type == "p"){
            fragment.insert(bam.Name);
          }
          string alignSum;
          if (type == "p") {
             alignSum = int2str(alignmentStart) + "\t" + int2str(mateStart) + "\t.\t" + strand;
          } else {
             alignSum = int2str(alignmentStart) + "\t" + int2str(alignmentEnd) + "\t.\t" + strand;
          }

          if ( alignmentStart != old_start ) {
            isposPileup = false;
            map <string, unsigned int>::iterator pit = pileup.begin();            
            for (; pit != pileup.end(); pit++) {
              posc_f << chrom << "\truping\tpileup\t" << pit->first << "\t.\t" << "Pileup=" << pit->second << endl;     //print pileup
            }
            pileup.clear();           //clear pileup set
            pileup.insert( pair <string, unsigned int> (alignSum, 1) );  //insert the new read
            total_pos++;
          }

          else if ( alignmentStart == old_start ) { // same starts
            if ( pileup.count(alignSum) > 0 ) {  // pileup
              if ( pileup[alignSum] == 1 && isposPileup == false ) { 
                pileup_pos++; isposPileup = true;
              }
              pileup[alignSum]++;
            }
            else {
              pileup.insert( pair <string, unsigned int> (alignSum, 1) );
            }
          } //same starts

        }   //new fragment

        old_start = alignmentStart;
      } // do pos check



      float incre = 1.;
      if (blockStarts.size() > 1) incre = 0.5;     // incre half for junction reads
      incre /= static_cast < float >(unique);        // for multi aligned reads

      deque <struct region>::iterator iter = regions.begin();

      if ( iter->start > alignmentEnd ) continue;  // skip reads not overlapping with the first region

      while ( iter->chr == old_chr && iter->start <= alignmentEnd && iter != regions.end() ) {

        if (iter->end < alignmentStart) {            // the region end is beyond the alignmentStart

          gene_processing(*iter,locus_b);            // processing
          iter = regions.erase(iter);                // this region should be removed
          if ( regions.empty() ) { 
            getline(region_f, line);                        // get a line of region file
            if ( ! region_f.eof() ) {
              eatline(line, regions, noChr);                         // eat a line and put it into the duque
              iter = regions.begin();
            }
            else {  // it's reaching the end of the region file
              cerr << "finished: end of region file, zone 3" << endl;
              break;
            }
          }
          continue;
        }

        if (iter->end >= alignmentStart && iter->start <= alignmentEnd) {  //overlapping, should take action

          vector <int>::iterator cigit = blockStarts.begin();
          for (; cigit != blockStarts.end(); cigit++) {
            unsigned int current_start = *cigit + alignmentStart;
            int current_pos = current_start - (iter->start);
            //cout << iter->chr << "\t" << iter->start << "\t" << iter->end << "\t" << current_start << endl;
            if ( (iter->tags).count(current_pos) > 0 ) {
              (iter->tags)[current_pos] += incre;
            }
            else
              (iter->tags).insert( pair<int, float>(current_pos, incre) );  
          }

        }  // overlapping take action!

        if ( (iter+1) != regions.end() )
          iter++;                                           // if this region is not the last element in the deque
        else {                                              // the last element
          getline(region_f, line);                          // get a line of region file
          if ( ! region_f.eof() ){
            eatline(line, regions, noChr);                         // eat a line and put it into the duque
            iter = regions.end();
            iter--;
          }
          else {  //it's reaching the end of the region file
            cerr << "finished: end of region file, zone 4" << endl;
            break;
          }
        }

      } //while

    }  // read a bam


    // print chr map
    if (posc == true) {
      chrmap_f << old_chr << "\t" << total_tags << "\t" << total_pos << "\t" << pileup_pos << endl;
    } 
 
    //somehow to loop back
    it = regions.begin();                   //reset to begin
    for (; it != regions.end() && it->chr == old_chr; ) {
      gene_processing(*it,locus_b);              // print the old region info
      it = regions.erase(it);             // erase the current region
    }
  
    while ( regions.empty() ) {    

      getline(region_f, line);
      if ( region_f.eof() ){
        cerr << "finished: end of region file, zone 5" << endl;
        //print locus bias
        for (unsigned int l = 0; l < 1000; l++){
	  locus_bias << l << "\t" << locus_b[l].ps << "\t" << locus_b[l].hs << "\t" << locus_b[l].pe << "\t" << locus_b[l].he << endl;
	}
        exit(0);
      }
      eatline(line, regions, noChr);
      it = regions.begin();
      if (it->chr == old_chr){
        gene_processing(*it, locus_b);      
        regions.clear();
        continue;
      }
    }

  } // region chr != old chr
      
  regions.clear();
  reader.Close();
  region_f.close();
  return 0;

} //main