Ejemplo n.º 1
0
void GenomeRegionSeqStats::CalcClusters(String &bamFile, int minMapQuality)
{
  SamFile sam;
  SamRecord samRecord;
  SamFileHeader samHeader;

  if(!sam.OpenForRead(bamFile.c_str()))
    error("Open BAM file %s failed!\n", bamFile.c_str());

  if(!sam.ReadHeader(samHeader)) {
      error("Read BAM file header %s failed!\n", bamFile.c_str());
  }
  
  if(depth.size()==0) depth.resize(referencegenome.sequenceLength());
  
  String contigLabel;
  uint32_t start;
  uint32_t gstart;
  Reset();
  while(sam.ReadRecord(samHeader, samRecord))
    {
      nReads++;
      if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;

      if(samRecord.getMapQuality() < minMapQuality) continue;

      CigarRoller cigar(samRecord.getCigar());

      int nonClipSequence = 0;

      if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
          nonClipSequence = cigar[0].count;

      contigLabel = samRecord.getReferenceName();
      start = nonClipSequence + samRecord.get0BasedPosition();  // start is 0-based

      gstart = referencegenome.getGenomePosition(contigLabel.c_str(), start);

      if(IsInRegions(contigLabel, start, start+samRecord.getReadLength())) continue;

      for(uint32_t i=gstart; i<gstart+samRecord.getReadLength(); i++)
       if(depth[i]<MAXDP)
        depth[i]++;
      nMappedOutTargets++;
    }
}
Ejemplo n.º 2
0
// Finds the total base quality of a read
int Dedup_LowMem::getBaseQuality(SamRecord & record) {
    const char* baseQualities = record.getQuality();
    int readLength = record.getReadLength();
    int quality = 0.;
    if(strcmp(baseQualities, "*") == 0)
    {
        return(0);
    }
    for(int i=0; i < readLength; ++i) {
        int q = static_cast<int>(baseQualities[i])-33;
        if ( q >= myMinQual ) quality += q;
    }
    return quality;
}
Ejemplo n.º 3
0
void GenomeRegionSeqStats::CalcRegionStats(String &bamFile)
{
  SamFile sam;
  SamRecord samRecord;
  SamFileHeader samHeader;

  if(!sam.OpenForRead(bamFile.c_str()))
    error("Open BAM file %s failed!\n", bamFile.c_str());

  if(!sam.ReadHeader(samHeader)) {
      error("Read BAM file header %s failed!\n", bamFile.c_str());
  }
  
  String contigLabel;
  int start, end;
  Reset();
  while(sam.ReadRecord(samHeader, samRecord))
    {
      nReads++;
      if(samRecord.getFlag() & SamFlag::UNMAPPED) nUnMapped++;

      if(contigFinishedCnt>=contigs.size()) continue;

      CigarRoller cigar(samRecord.getCigar());

      int nonClipSequence = 0;

      if(cigar.size()!=0 && cigar[0].operation==Cigar::softClip)
          nonClipSequence = cigar[0].count;

      contigLabel = samRecord.getReferenceName();
      start = nonClipSequence + samRecord.get0BasedPosition();  // start is 0-based
      end = start + samRecord.getReadLength() - 1;
      if(UpdateRegionStats(contigLabel, start, end)) nMapped2Targets++;
    }
    CalcRegionReadCountInGCBins();
    CalcGroupReadCountInGCBins();
    std::cout << "Total reads : " << nReads << std::endl;
}
Ejemplo n.º 4
0
int Likelihood::getFlankingCount( string & chr, int pos )
{
	bool status = bam.SetReadSection( chr.c_str(), pos - avr_read_length * 2, pos + avr_read_length/2 );
	if (!status)
		return 0;

	int n = 0;
	SamRecord rec;
	while(bam.ReadRecord(bam_header, rec)) {
		if ( !(rec.getFlag() & 0x2) )
			continue;
		if (IsSupplementary(rec.getFlag()))
			continue;
		if (rec.getReadLength() < avr_read_length / 3)
			continue;
		if (rec.getMapQuality() < MIN_QUALITY)
			continue;
		if (rec.get1BasedPosition() + MIN_CLIP/2 <= pos && rec.get1BasedAlignmentEnd() - MIN_CLIP/2 >= pos )
			n++;
	}

	return n;
}
Ejemplo n.º 5
0
bool Recab::processReadApplyTable(SamRecord& samRecord)
{
    static BaseData data;
    static std::string readGroup;
    static std::string aligTypes;

    int seqLen = samRecord.getReadLength();

    uint16_t  flag = samRecord.getFlag();

    // Check if the flag contains an exclude.
    if((flag & myIntApplyExcludeFlags) != 0)
    {
        // Do not apply the recalibration table to this read.
        ++myNumApplySkipped;
        return(false);
    }
    ++myNumApplyReads;
   
    readGroup = samRecord.getString("RG").c_str();

    // Look for the read group in the map.
    // TODO - extra string constructor??
    RgInsertReturn insertRet = 
        myRg2Id.insert(std::pair<std::string, uint16_t>(readGroup, 0));
    if(insertRet.second == true)
    {
        // New element inserted.
        insertRet.first->second = myId2Rg.size();
        myId2Rg.push_back(readGroup);
    }

    data.rgid = insertRet.first->second;

    if(!myQField.IsEmpty())
    {
        // Check if there is an old quality.
        const String* oldQPtr =
            samRecord.getStringTag(myQField.c_str());
        if((oldQPtr != NULL) && (oldQPtr->Length() == seqLen))
        {
            // There is an old quality, so use that.
            myQualityStrings.oldq = oldQPtr->c_str();
        }
        else
        {
            myQualityStrings.oldq = samRecord.getQuality();
        }
    }
    else
    {
        myQualityStrings.oldq = samRecord.getQuality();
    }

    if(myQualityStrings.oldq.length() != (unsigned int)seqLen)
    {
        Logger::gLogger->warning("Quality is not the correct length, so skipping recalibration on that record.");
        return(false);
    }

    myQualityStrings.newq.resize(seqLen);

    ////////////////
    ////// iterate sequence
    ////////////////
    int32_t seqPos = 0;
    int seqIncr = 1;

    bool reverse;
    if(SamFlag::isReverse(flag))
    {
        reverse = true;
        seqPos = seqLen - 1;
        seqIncr = -1;
    }
    else
        reverse = false;

    // Check which read - this will be the same for all positions, so 
    // do this outside of the smaller loop.
    if(!SamFlag::isPaired(flag) || SamFlag::isFirstFragment(flag))
        // Mark as first if it is not paired or if it is the
        // first in the pair.
        data.read = 0;
    else
        data.read = 1;

    // Set unsetbase for curBase.
    // This will be used for the prebase of cycle 0.
    data.curBase = 'K';

    for (data.cycle = 0; data.cycle < seqLen; data.cycle++, seqPos += seqIncr)
    {
        // Set the preBase to the previous cycle's current base.
        // For cycle 0, curBase was set to a default value.
        data.preBase = data.curBase;

        // Get the current base.
        data.curBase = samRecord.getSequence(seqPos);

        if(reverse)
        {
            // Complement the current base.
            data.curBase =
                BaseAsciiMap::base2complement[(unsigned int)(data.curBase)];
        }

        // Get quality
        data.qual = 
            BaseUtilities::getPhredBaseQuality(myQualityStrings.oldq[seqPos]);

        // skip bases with quality below the minimum set.
        if(data.qual < myMinBaseQual)
        {
            myQualityStrings.newq[seqPos] = myQualityStrings.oldq[seqPos];
            continue;
        }

        // Update quality score
        uint8_t qemp = hasherrormodel.getQemp(data);
        qemp = mySqueeze.getQualCharFromQemp(qemp);
        if(qemp > myMaxBaseQualChar)
        {
            qemp = myMaxBaseQualChar;
        }
        myQualityStrings.newq[seqPos] = qemp;
    }

    if(!myStoreQualTag.IsEmpty())
    {
        samRecord.addTag(myStoreQualTag, 'Z', myQualityStrings.oldq.c_str());
    }
    samRecord.setQuality(myQualityStrings.newq.c_str());

    return true;
}
Ejemplo n.º 6
0
bool Recab::processReadBuildTable(SamRecord& samRecord)
{
    static BaseData data;
    static std::string chromosomeName;
    static std::string readGroup;
    static std::string aligTypes;

    int seqLen = samRecord.getReadLength();
    
    // Check if the parameters have been processed.
    if(!myParamsSetup)
    {
        // This throws an exception if the reference cannot be setup.
        processParams();
    }

    uint16_t  flag = samRecord.getFlag();

    if(!SamFlag::isMapped(flag))
    {
        // Unmapped, skip processing
        ++myUnMappedCount;
    }
    else
    {
        // This read is mapped.
        ++myMappedCount;
    }

    if(SamFlag::isSecondary(flag))
    {
        // Secondary read
        ++mySecondaryCount;
    }
    if(SamFlag::isDuplicate(flag))
    {
        ++myDupCount;
    }
    if(SamFlag::isQCFailure(flag))
    {
        ++myQCFailCount;
    }

    // Check if the flag contains an exclude.
    if((flag & myIntBuildExcludeFlags) != 0)
    {
        // Do not use this read for building the recalibration table.
        ++myNumBuildSkipped;
        return(false);
    }

    if(samRecord.getMapQuality() == 0)
    {
        // 0 mapping quality, so skip processing.
        ++myMapQual0Count;
        ++myNumBuildSkipped;
        return(false);
    }
    if(samRecord.getMapQuality() == 255)
    {
        // 255 mapping quality, so skip processing.
        ++myMapQual255Count;
        ++myNumBuildSkipped;
        return(false);
    }
    
    chromosomeName = samRecord.getReferenceName();
    readGroup = samRecord.getString("RG").c_str();

    // Look for the read group in the map.
    // TODO - extra string constructor??
    RgInsertReturn insertRet = 
        myRg2Id.insert(std::pair<std::string, uint16_t>(readGroup, 0));
    if(insertRet.second == true)
    {
        // New element inserted.
        insertRet.first->second = myId2Rg.size();
        myId2Rg.push_back(readGroup);
    }

    data.rgid = insertRet.first->second;


    //reverse
    bool reverse;
    if(SamFlag::isReverse(flag))
        reverse = true;
    else
        reverse = false;

    if(myReferenceGenome == NULL)
    {
        throw std::runtime_error("Failed to setup Reference File.\n");
    }

    genomeIndex_t mapPos = 
        myReferenceGenome->getGenomePosition(chromosomeName.c_str(), 
                                             samRecord.get1BasedPosition());

    if(mapPos==INVALID_GENOME_INDEX)
    {
    	Logger::gLogger->warning("INVALID_GENOME_INDEX (chrom:pos %s:%ld) and record skipped... Reference in BAM is different from the ref used here!", chromosomeName.c_str(), samRecord.get1BasedPosition());

        ++myNumBuildSkipped;
        return false;
    }

    if(!myQField.IsEmpty())
    {
        // Check if there is an old quality.
        const String* oldQPtr = 
            samRecord.getStringTag(myQField.c_str());
        if((oldQPtr != NULL) && (oldQPtr->Length() == seqLen))
        {
            // There is an old quality, so use that.
            myQualityStrings.oldq = oldQPtr->c_str();
        }
        else
        {
            // Tag was not found, so use the current quality.
            ++myNumQualTagErrors;
            if(myNumQualTagErrors == 1)
            {
                Logger::gLogger->warning("Recab: %s tag was not found/invalid, so using the quality field in records without the tag", myQField.c_str());
            }
            myQualityStrings.oldq = samRecord.getQuality();
        }
        //printf("%s\n",samRecord.getQuality());
        //printf("%s:%s\n",myQField.c_str(),temp.c_str());
    }
    else
    {
        myQualityStrings.oldq = samRecord.getQuality();
    }

    if(myQualityStrings.oldq.length() != (unsigned int)seqLen)
    {
        Logger::gLogger->warning("Quality is not the correct length, so skipping recalibration on that record.");
        ++myNumBuildSkipped;
        return(false);
    }

    aligTypes = "";
    Cigar* cigarPtr = samRecord.getCigarInfo();

    if(cigarPtr == NULL)
    {
        Logger::gLogger->warning("Failed to get the cigar");
        ++myNumBuildSkipped;
        return(false);
    }

    // This read will be used for building the recab table.
    ++myNumBuildReads;

    ////////////////
    ////// iterate sequence
    ////////////////
    genomeIndex_t refPos = 0;
    int32_t refOffset = 0;
    int32_t prevRefOffset = Cigar::INDEX_NA;
    int32_t seqPos = 0;
    int seqIncr = 1;
    if(reverse)
    {
        seqPos = seqLen - 1;
        seqIncr = -1;
    }

    // read
    if(!SamFlag::isPaired(flag) || SamFlag::isFirstFragment(flag))
        // Mark as first if it is not paired or if it is the
        // first in the pair.
        data.read = 0;
    else
        data.read = 1;

    // Set unsetbase for curBase.
    // This will be used for the prebase of cycle 0.
    data.curBase = 'K';

    for (data.cycle = 0; data.cycle < seqLen; data.cycle++, seqPos += seqIncr)
    {
        // Store the previous current base in preBase.
        data.preBase = data.curBase;

        // Get the current base before checking if we are going to
        // process this position so it will be set for the next position.
        data.curBase = samRecord.getSequence(seqPos);
        if(reverse)
        {
            // Complement the current base.
            // The prebase is already complemented.
            data.curBase = 
                BaseAsciiMap::base2complement[(unsigned int)(data.curBase)];
        }
        
        // Get the reference offset.
        refOffset = cigarPtr->getRefOffset(seqPos);
        if(refOffset == Cigar::INDEX_NA)
        {
            // Not a match/mismatch, so continue to the next one which will
            // not have a previous match/mismatch.
            // Set previous ref offset to a negative so
            // the next one won't be kept.
            prevRefOffset = -2;
            continue;
        }

        // This one is a match.
        refPos = mapPos + refOffset;

        // Check to see if we should process this position.
        // Do not process if it is cycle 0 and:
        //   1) current base is in dbsnp
        if(data.cycle == 0)
        {
            if(!(myDbsnpFile.IsEmpty()) && myDbSNP[refPos])
            {
                // Save the previous reference offset.
                ++myNumDBSnpSkips;
                prevRefOffset = refOffset;
                continue;
            }
        }
        else
        {
            // Do not process if it is not cycle 0 and:
            //   1) previous reference position not adjacent 
            //      (not a match/mismatch)
            //   2) previous base is in dbsnp
            //   3) current base is in dbsnp
            if((!myKeepPrevNonAdjacent && (refOffset != (prevRefOffset + seqIncr))) ||
               (data.preBase == 'K'))
            {
                // Save the previous reference offset.
                prevRefOffset = refOffset;
                continue;
            }
            if(!(myDbsnpFile.IsEmpty()) && 
               (myDbSNP[refPos] ||
                (!myKeepPrevDbsnp && myDbSNP[refPos - seqIncr])))
            {
                ++myNumDBSnpSkips;
                // Save the previous reference offset.
                prevRefOffset = refOffset;
                continue;
            }
       }
        
        // Save the previous reference offset.
        prevRefOffset = refOffset;

        // Set the reference & read bases in the Covariates
        char refBase = (*myReferenceGenome)[refPos];

        if(BaseUtilities::isAmbiguous(refBase))
        {
            // N reference, so skip it when building the table.
            ++myAmbiguous;
            continue;
        }

        if(reverse)
        {
            refBase = BaseAsciiMap::base2complement[(unsigned int)(refBase)];
        }

        // Get quality char
        data.qual = 
            BaseUtilities::getPhredBaseQuality(myQualityStrings.oldq[seqPos]);

        // skip bases with quality below the minimum set.
        if(data.qual < myMinBaseQual)
        {
            ++mySubMinQual;
            continue;
        }

        if(BaseUtilities::areEqual(refBase, data.curBase)
           && (BaseAsciiMap::base2int[(unsigned int)(data.curBase)] < 4))
            myBMatchCount++;
        else
            myBMismatchCount++;

        hasherrormodel.setCell(data, refBase);
        myBasecounts++;
    }
    return true;
}
Ejemplo n.º 7
0
bool BamProcessor::processRecord ()
{
    trclog << "\nProcessing record " << read_cnt_ << " - " << rec_.getReadName () << ", " << rec_.get0BasedUnclippedEnd () << "->" << rec_.getReadLength () << ", ref " << rec_.getReferenceName () << std::endl;
    const char* seq = rec_.getSequence ();
    unsigned position = rec_.get0BasedPosition ();
    unsigned new_position = position;
    bool reverse_match = (rec_.getFlag () & 0x10);

    Cigar* cigar_p = rec_.getCigarInfo ();
    if (!cigar_p->size ())  // can not recreate reference is cigar is missing. Keep record unaligned.
    {                       // TODO: allow to specify and load external reference
        ++ unaligned_cnt_;
        return true;
    }

    myassert (cigar_p);

    const String *mdval = rec_.getStringTag ("MD");
    if (!mdval) // can not recreate reference is MD tag is missing. Keep record as is.
    {
        warn << "No MD Tag for record " << proc_cnt_ << ". Skipping record." << std::endl;
        ++nomd_cnt_;
        return true; // record will be kept as-is.
    }
    std::string md_tag = mdval->c_str ();

    // find the non-clipped region
    uint32_t clean_len;
    EndClips clips;
    const char* clean_read = clip_seq (seq, *cigar_p, clean_len, clips);

    // find length needed for the reference
    // this reserves space enough for entire refference, including softclipped ends.
    unsigned ref_len = cigar_p->getExpectedReferenceBaseCount ();
    if (ref_buffer_sz_ < ref_len)
    {
        ref_buffer_sz_ = (1 + ref_len / REF_BUF_INCR) * REF_BUF_INCR;
        ref_buffer_.reset (ref_buffer_sz_);
    }
    if (clean_len > MAX_SEQ_LEN || ref_len > MAX_SEQ_LEN)
    {
        ++ toolongs_;
        return true;
    }

    // recreate reference by Query, Cigar, and MD tag. Do not include softclipped ends in the recreated sequence (use default last parameter)
    recreate_ref (seq, rec_.getReadLength (), cigar_p, md_tag.c_str (), ref_buffer_, ref_buffer_sz_);

    unsigned qry_ins; // extra bases in query     == width_left
    unsigned ref_ins; // extra bases in reference == width_right
    band_width (*cigar_p, qry_ins, ref_ins);

    if (log_matr_ || log_base_)
    {
        logfile_ << "Record " << read_cnt_ << ": " << rec_.getReadName () << "\n"
                 << "   sequence (" << rec_.getReadLength () << " bases)\n";
    }

    CigarRoller roller;
    int ref_shift = 0;  // shift of the new alignment position on refereance relative the original
    unsigned qry_off, ref_off; // offsets on the query and reference of the first non-clipped aligned bases
    double new_score = 0;

    switch (p_->algo ())
    {
        case ContalignParams::TEMPL:
        {
            // call aligner
            new_score = taligner_.eval (clean_read, clean_len, ref_buffer_, ref_len, 0, band_width_);
            // read traceback
            // TODO: convert directly to cigar
            genstr::Alignment* al = taligner_.trace ();
            // convert alignment to cigar
            ref_shift = roll_cigar (roller, *al, clean_len, clips, qry_off, ref_off);
        }
        break;
        case ContalignParams::PLAIN:
        {
            new_score = aligner_.align_band (
                clean_read,                     // xseq
                clean_len,                      // xlen
                ref_buffer_,                    // yseq
                ref_len,                        // ylen
                0,                              // xpos
                0,                              // ypos
                std::max (clean_len, ref_len),  // segment length
                qry_ins + band_width_,          // width_left
                false,                          // unpack
                ref_ins + band_width_,          // width_right - forces to width_left
                true,                           // to_beg
                true                            // to_end
                );
            unsigned bno = aligner_.backtrace (
                    batches_,      // BATCH buffer
                    max_batch_no_, // size of BATCH buffer
                    false,         // fill the BATCH array in reverse direction
                    ref_ins + band_width_ // width
                                    );
            // convert alignment to cigar
            ref_shift = roll_cigar (roller, batches_, bno, clean_len, clips, qry_off, ref_off);
        }
        break;
        case ContalignParams::POLY:
        {
            new_score = contalign_.align_band (
                clean_read,                     // xseq
                clean_len,                      // xlen
                ref_buffer_,                    // yseq
                ref_len,                        // ylen
                0,                              // xpos
                0,                              // ypos
                std::max (clean_len, ref_len),  // segment length
                qry_ins + band_width_,          // width_left
                false,                          // unpack
                ref_ins + band_width_,          // width_right - forces to width_left
                true,                           // to_beg
                true                            // to_end
                );
            unsigned bno = contalign_.backtrace (
                    batches_,      // BATCH buffer
                    max_batch_no_, // size of BATCH buffer
                    false,         // fill the BATCH array in reverse direction
                    ref_ins + band_width_ // width
                                    );
            // convert alignment to cigar
            ref_shift = roll_cigar (roller, batches_, bno, clean_len, clips, qry_off, ref_off);
        }
        break;
        default:
        break;
    }
    ++realigned_cnt_;
    // compare original and new cigar (and location)
    if (ref_shift || !(*cigar_p == roller))
    {
        // save original cigar and position for reporting
        std::string orig_cigar_str;
        rec_.getCigarInfo ()->getCigarString (orig_cigar_str);
        int32_t prior_pos = rec_.get0BasedPosition ();

        // replace cigar
        rec_.setCigar (roller);
        ++ modified_cnt_;
        // update pos_adjusted_cnt if position changed
        if (ref_shift != 0)
        {
            myassert (prior_pos + ref_shift >= 0);
            rec_.set0BasedPosition (prior_pos + ref_shift);
            ++ pos_adjusted_cnt_;
        }
        if (log_diff_)
        {
            const unsigned MAX_BATCH_PRINTED = 100;
            BATCH batches [MAX_BATCH_PRINTED];
            std::string new_cigar_str;
            unsigned bno;
            int swscore;

            rec_.getCigarInfo ()->getCigarString (new_cigar_str);
            if (!log_base_ && !log_matr_)
                logfile_ << "Record " << read_cnt_ << ": " << rec_.getReadName () << " (" << rec_.getReadLength () << " bases)\n";

            logfile_ << "   ORIG ALIGNMENT:" << std::right << std::setw (9) << prior_pos+1 << "->" <<  orig_cigar_str << "\n";
            bno = cigar_to_batches (orig_cigar_str, batches, MAX_BATCH_PRINTED);
            swscore = align_score (batches, bno, clean_read, ref_buffer_, p_->gip (), p_->gep (), p_->mat (), p_->mis ());
            print_batches (clean_read, clean_len, false, ref_buffer_, ref_len, false, batches, bno, logfile_, false, prior_pos + clips.soft_beg_, clips.soft_beg_, 0, 160);
            logfile_ << "\n     'classic' SW score is " << swscore << "\n";

            logfile_ << "   NEW ALIGNMENT:" << std::right << std::setw (9) << rec_.get1BasedPosition () << "->" <<  new_cigar_str << std::endl;
            bno = cigar_to_batches (new_cigar_str, batches, MAX_BATCH_PRINTED);
            swscore = align_score (batches, bno, clean_read + qry_off, ref_buffer_ + ref_off, p_->gip (), p_->gep (), p_->mat (), p_->mis ());
            print_batches (clean_read + qry_off, clean_len - qry_off, false, ref_buffer_ + ref_off, ref_len - ref_off, false, batches, bno, logfile_, false, prior_pos + clips.soft_beg_ + ref_off, clips.soft_beg_ + qry_off, 0, 160);
            logfile_ << "\n      'classic' SW score is " << swscore;
            logfile_ << "\n      alternate (context-aware) score is " << new_score << ", used bandwidth left: " << qry_ins + band_width_ << ", right: " << ref_ins + band_width_ << "\n" << std::endl;
        }
        else if (log_base_)
        {
            logfile_ << "Recomputed alignment differs from original:\n";
            logfile_ << "   ORIG ALIGNMENT:" << std::right << std::setw (9) << prior_pos+1 << "->" <<  orig_cigar_str << "\n";
            std::string new_cigar_str;
            rec_.getCigarInfo ()->getCigarString (new_cigar_str);
            logfile_ << "    NEW ALIGNMENT:" << std::right << std::setw (9) << rec_.get1BasedPosition () << "->" <<  new_cigar_str << "\n" << std::endl;
        }
    }
    else
    {
        if (log_base_)
        {
            logfile_ << "Recomputed alignment matches the original:\n";
            std::string orig_cigar_str;
            rec_.getCigarInfo ()->getCigarString (orig_cigar_str);
            int32_t prior_pos = rec_.get0BasedPosition ();
            logfile_ << "   " << std::right << std::setw (9) << prior_pos+1 << "->" <<  orig_cigar_str << "\n" << std::endl;
        }
    }
    return true;
}
Ejemplo n.º 8
0
void validateRead1ModQuality(SamRecord& samRecord)
{
    //////////////////////////////////////////
    // Validate Record 1
    // Create record structure for validating.
    int expectedBlockSize = 89;
    const char* expectedReferenceName = "1";
    const char* expectedMateReferenceName = "1";
    const char* expectedMateReferenceNameOrEqual = "=";

    bamRecordStruct* expectedRecordPtr =
        (bamRecordStruct *) malloc(expectedBlockSize + sizeof(int));

    char tag[3];
    char type;
    void* value;
    bamRecordStruct* bufferPtr;
    unsigned char* varPtr;

    expectedRecordPtr->myBlockSize = expectedBlockSize;
    expectedRecordPtr->myReferenceID = 0;
    expectedRecordPtr->myPosition = 1010;
    expectedRecordPtr->myReadNameLength = 23;
    expectedRecordPtr->myMapQuality = 0;
    expectedRecordPtr->myBin = 4681;
    expectedRecordPtr->myCigarLength = 2;
    expectedRecordPtr->myFlag = 73;
    expectedRecordPtr->myReadLength = 5;
    expectedRecordPtr->myMateReferenceID = 0;
    expectedRecordPtr->myMatePosition = 1010;
    expectedRecordPtr->myInsertSize = 0;
   
    // Check the alignment end
    assert(samRecord.get0BasedAlignmentEnd() == 1016);
    assert(samRecord.get1BasedAlignmentEnd() == 1017);
    assert(samRecord.getAlignmentLength() == 7);
    assert(samRecord.get0BasedUnclippedStart() == 1010);
    assert(samRecord.get1BasedUnclippedStart() == 1011);
    assert(samRecord.get0BasedUnclippedEnd() == 1016);
    assert(samRecord.get1BasedUnclippedEnd() == 1017);

    // Check the accessors.
    assert(samRecord.getBlockSize() == expectedRecordPtr->myBlockSize);
    assert(samRecord.getReferenceID() == expectedRecordPtr->myReferenceID);
    assert(strcmp(samRecord.getReferenceName(), expectedReferenceName) == 0);
    assert(samRecord.get1BasedPosition() == expectedRecordPtr->myPosition + 1);
    assert(samRecord.get0BasedPosition() == expectedRecordPtr->myPosition);
    assert(samRecord.getReadNameLength() == 
           expectedRecordPtr->myReadNameLength);
    assert(samRecord.getMapQuality() == expectedRecordPtr->myMapQuality);
    assert(samRecord.getBin() == expectedRecordPtr->myBin);
    assert(samRecord.getCigarLength() == expectedRecordPtr->myCigarLength);
    assert(samRecord.getFlag() == expectedRecordPtr->myFlag);
    assert(samRecord.getReadLength() == expectedRecordPtr->myReadLength);
    assert(samRecord.getMateReferenceID() ==
           expectedRecordPtr->myMateReferenceID);
    assert(strcmp(samRecord.getMateReferenceName(), 
                  expectedMateReferenceName) == 0);
    assert(strcmp(samRecord.getMateReferenceNameOrEqual(), 
                  expectedMateReferenceNameOrEqual) == 0);
    assert(samRecord.get1BasedMatePosition() == 
           expectedRecordPtr->myMatePosition + 1);
    assert(samRecord.get0BasedMatePosition() ==
           expectedRecordPtr->myMatePosition);
    assert(samRecord.getInsertSize() == expectedRecordPtr->myInsertSize);
    assert(strcmp(samRecord.getReadName(), "1:1011:F:255+17M15D20M") == 0);
    assert(strcmp(samRecord.getCigar(), "5M2D") == 0);
    assert(strcmp(samRecord.getSequence(), "CCGAA") == 0);
    assert(strcmp(samRecord.getQuality(), "ABCDE") == 0);
    assert(samRecord.getNumOverlaps(1010, 1017) == 5);
    assert(samRecord.getNumOverlaps(1010, 1016) == 5);
    assert(samRecord.getNumOverlaps(1012, 1017) == 3);
    assert(samRecord.getNumOverlaps(1015, 1017) == 0);
    assert(samRecord.getNumOverlaps(1017, 1010) == 0);
    assert(samRecord.getNumOverlaps(1013, 1011) == 0);
    assert(samRecord.getNumOverlaps(-1, 1017) == 5);

    // Reset the tag iter, since the tags have already been read.
    samRecord.resetTagIter();

    // Check the tags.
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'A');
    assert(tag[1] == 'M');
    assert(type == 'i');
    assert(*(char*)value == 0);
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'M');
    assert(tag[1] == 'D');
    assert(type == 'Z');
    assert(*(String*)value == "37");
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'N');
    assert(tag[1] == 'M');
    assert(type == 'i');
    assert(*(char*)value == 0);
    assert(samRecord.getNextSamTag(tag, type, &value) == true);
    assert(tag[0] == 'X');
    assert(tag[1] == 'T');
    assert(type == 'A');
    assert(*(char*)value == 'R');
    // No more tags, should return false.
    assert(samRecord.getNextSamTag(tag, type, &value) == false);
    assert(samRecord.getNextSamTag(tag, type, &value) == false);

    // Get the record ptr.   
    bufferPtr = (bamRecordStruct*)samRecord.getRecordBuffer();
    // Validate the buffers match.
    assert(bufferPtr->myBlockSize == expectedRecordPtr->myBlockSize);
    assert(bufferPtr->myReferenceID == expectedRecordPtr->myReferenceID);
    assert(bufferPtr->myPosition == expectedRecordPtr->myPosition);
    assert(bufferPtr->myReadNameLength == expectedRecordPtr->myReadNameLength);
    assert(bufferPtr->myMapQuality == expectedRecordPtr->myMapQuality);
    assert(bufferPtr->myBin == expectedRecordPtr->myBin);
    assert(bufferPtr->myCigarLength == expectedRecordPtr->myCigarLength);
    assert(bufferPtr->myFlag == expectedRecordPtr->myFlag);
    assert(bufferPtr->myReadLength == expectedRecordPtr->myReadLength);
    assert(bufferPtr->myMateReferenceID ==
           expectedRecordPtr->myMateReferenceID);
    assert(bufferPtr->myMatePosition == expectedRecordPtr->myMatePosition);
    assert(bufferPtr->myInsertSize == expectedRecordPtr->myInsertSize);

    // Validate the variable length fields in the buffer.
    // Set the pointer to the start of the variable fields.
    varPtr = (unsigned char*)(&(bufferPtr->myData[0]));

    // Validate the readname.
    for(int i = 0; i < expectedRecordPtr->myReadNameLength; i++)
    {
        assert(*varPtr == samRecord.getReadName()[i]);
        varPtr++;
    }

    // Validate the cigar.
    // The First cigar is 5M which is 5 << 4 | 0 = 80
    assert(*(unsigned int*)varPtr == 80);
    // Increment the varptr the size of an int.
    varPtr += 4;
    // The 2nd cigar is 2D which is 2 << 4 | 2 = 34
    assert(*(unsigned int*)varPtr == 34);
    // Increment the varptr the size of an int.
    varPtr += 4;
   
    // Validate the sequence.
    // CC = 0x22
    assert(*varPtr == 0x22);
    varPtr++;
    // GA = 0x41
    assert(*varPtr == 0x41);
    varPtr++;
    // A  = 0x10
    assert(*varPtr == 0x10);
    varPtr++;
  
    // Validate the Quality
    for(int i = 0; i < expectedRecordPtr->myReadLength; i++)
    {
        assert(*varPtr == samRecord.getQuality()[i] - 33);
        varPtr++;
    }

    // Validate the tags.  
    assert(*varPtr == 'A');
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'C');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'D');
    varPtr++;
    assert(*varPtr == 'Z');
    varPtr++;
    assert(*varPtr == '3');
    varPtr++;
    assert(*varPtr == '7');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'N');
    varPtr++;
    assert(*varPtr == 'M');
    varPtr++;
    assert(*varPtr == 'C');
    varPtr++;
    assert(*varPtr == 0);
    varPtr++;
    assert(*varPtr == 'X');
    varPtr++;
    assert(*varPtr == 'T');
    varPtr++;
    assert(*varPtr == 'A');
    varPtr++;
    assert(*varPtr == 'R');
    varPtr++;
}
Ejemplo n.º 9
0
SamFilter::FilterStatus SamFilter::clipOnMismatchThreshold(SamRecord& record, 
                                                           GenomeSequence& refSequence,
                                                           double mismatchThreshold)
{
    // Read & clip from the left & right.    
    SamQuerySeqWithRefIter iterFromFront(record, refSequence, true);
    SamQuerySeqWithRefIter iterFromBack(record, refSequence, false);

    SamSingleBaseMatchInfo baseMatchInfo;

    int32_t readLength = record.getReadLength();
    // Init last front clip to be prior to the lastFront index (0).
    const int32_t initialLastFrontClipPos = -1;
    int32_t lastFrontClipPos = initialLastFrontClipPos;
    // Init first back clip to be past the last index (readLength).
    int32_t firstBackClipPos = readLength;

    bool fromFrontComplete = false;
    bool fromBackComplete = false;
    int32_t numBasesFromFront = 0;
    int32_t numBasesFromBack = 0;
    int32_t numMismatchFromFront = 0;
    int32_t numMismatchFromBack = 0;

    //////////////////////////////////////////////////////////
    // Determining the clip positions.
    while(!fromFrontComplete || !fromBackComplete)
    {
        // Read from the front (left to right) of the read until
        // more have been read from that direction than the opposite direction.
        while(!fromFrontComplete && 
              ((numBasesFromFront <= numBasesFromBack) ||
               (fromBackComplete)))
        {
            if(iterFromFront.getNextMatchMismatch(baseMatchInfo) == false)
            {
                // Nothing more to read in this direction.
                fromFrontComplete = true;
                break;
            }
            // Got a read.  Check to see if it is to or past the last clip.
            if(baseMatchInfo.getQueryIndex() >= firstBackClipPos)
            {
                // This base is past where we are clipping, so we
                // are done reading in this direction.
                fromFrontComplete = true;
                break;
            }
            // This is an actual base read from the left to the
            // right, so up the counter and determine if it was a mismatch.
            ++numBasesFromFront;

            if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
            {
                // Mismatch
                ++numMismatchFromFront;
                // Check to see if it is over the threshold.
                double mismatchPercent = 
                    (double)numMismatchFromFront / numBasesFromFront;
                if(mismatchPercent > mismatchThreshold)
                {
                    // Need to clip.
                    lastFrontClipPos = baseMatchInfo.getQueryIndex();
                    // Reset the counters.
                    numBasesFromFront = 0;
                    numMismatchFromFront = 0;
                }
            }
        }

        // Now, read from right to left until more have been read
        // from the back than from the front.
        while(!fromBackComplete && 
              ((numBasesFromBack <= numBasesFromFront) ||
               (fromFrontComplete)))
        {
            if(iterFromBack.getNextMatchMismatch(baseMatchInfo) == false)
            {
                // Nothing more to read in this direction.
                fromBackComplete = true;
                break;
            }
            // Got a read.  Check to see if it is to or past the first clip.
            if(baseMatchInfo.getQueryIndex() <= lastFrontClipPos)
            {
                // This base is past where we are clipping, so we
                // are done reading in this direction.
                fromBackComplete = true;
                break;
            }
            // This is an actual base read from the right to the
            // left, so up the counter and determine if it was a mismatch.
            ++numBasesFromBack;

            if(baseMatchInfo.getType() == SamSingleBaseMatchInfo::MISMATCH)
            {
                // Mismatch
                ++numMismatchFromBack;
                // Check to see if it is over the threshold.
                double mismatchPercent = 
                    (double)numMismatchFromBack / numBasesFromBack;
                if(mismatchPercent > mismatchThreshold)
                {
                    // Need to clip.
                    firstBackClipPos = baseMatchInfo.getQueryIndex();
                    // Reset the counters.
                    numBasesFromBack = 0;
                    numMismatchFromBack = 0;
                }
            }
        }
    }

    //////////////////////////////////////////////////////////
    // Done determining the clip positions, so clip.
    // To determine the number of clips from the front, add 1 to the
    // lastFrontClipPos since the index starts at 0.
    // To determine the number of clips from the back, subtract the
    // firstBackClipPos from the readLength.
    // Example:
    // Pos:  012345
    // Read: AAAAAA
    // Read Length = 6.  If lastFrontClipPos = 2 and firstBackClipPos = 4, numFrontClips = 3 & numBack = 2.
    return(softClip(record, lastFrontClipPos + 1, readLength - firstBackClipPos));
}
Ejemplo n.º 10
0
// Soft Clip from the end of the read at the specified reference position.
int32_t CigarHelper::softClipEndByRefPos(SamRecord& record, 
                                         int32_t refPosition0Based,
                                         CigarRoller& newCigar)
{
    newCigar.clear();
    Cigar* cigar = record.getCigarInfo();
    if(cigar == NULL)
    {
        // Failed to get the cigar.
        ErrorHandler::handleError("Soft clipping, but failed to read the cigar");
        return(NO_CLIP);
    }

    // No cigar or position in the record, so return no clip.
    if((cigar->size() == 0) || (record.get0BasedPosition() == -1))
    {
        return(NO_CLIP);
    }

    // Check to see if the reference position occurs after the record ends,
    // if so, do no clipping.
    if(refPosition0Based > record.get0BasedAlignmentEnd())
    {
        // Not within this read, so nothing to clip.
        newCigar.Set(record.getCigar());
        return(NO_CLIP);
    }

    // The position falls before the read ends, so loop through until the
    // position is found.
    int32_t currentRefPosition = record.get0BasedPosition();
    int32_t readClipPosition = 0;
    for(int i = 0; i < cigar->size(); i++)
    {
        const Cigar::CigarOperator* op = &(cigar->getOperator(i));

        // If the operation is found in the reference, increase the
        // reference position.
        if(Cigar::foundInReference(*op))
        {
            // match, mismatch, deletion, skip
            // increment the current reference position to just past
            // this operation.
            currentRefPosition += op->count;
        }
         
        // Check to see if we have hit our clip position.
        if(refPosition0Based < currentRefPosition)
        {
            // If this read is also in the query (match/mismatch), 
            // write the partial op to the new cigar.
            int32_t numKeep = 0;
            if(Cigar::foundInQuery(*op))
            {
                numKeep = op->count - (currentRefPosition - refPosition0Based);
                if(numKeep > 0)
                {
                    newCigar.Add(op->operation, numKeep);
                    readClipPosition += numKeep;
                }
            }
            else if(Cigar::isClip(*op))
            {
                // This is a hard clip, so write it.
                newCigar.Add(op->operation, op->count);
            }
            else
            {

                // Not found in the query (skip/deletion),
                // so don't write any of the operation.
            }
            // Found the clip point, so break.
            break;
        }
        else if(refPosition0Based == currentRefPosition)
        {
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                readClipPosition += op->count;
            }
        }
        else
        {
            // Not yet to the clip position, so add this operation/size to
            // the new cigar.
            newCigar += *op;
            if(Cigar::foundInQuery(*op))
            {
                // Found in the query, so update the read clip position.
                readClipPosition += op->count;
            }
        }
    } // End loop through cigar.

    // Before adding the softclip, read from the end of the cigar checking to
    // see if the operations are in the query, removing operations that are
    // not (pad/delete/skip) until a hardclip or an operation in the query is
    // found. We do not want a pad/delete/skip right before a softclip.
    for(int j = newCigar.size() - 1; j >= 0; j--)
    {
        const Cigar::CigarOperator* op = &(newCigar.getOperator(j));
        if(!Cigar::foundInQuery(*op) && !Cigar::isClip(*op))
        {
            // pad/delete/skip
            newCigar.Remove(j);
        }
        else if(Cigar::foundInQuery(*op) & Cigar::isClip(*op))
        {
            // Soft clip, so increment the clip position for the return value.
            // Remove the softclip since the readClipPosition is used to
            // calculate teh size of the soft clip added.
            readClipPosition -= op->count;
            newCigar.Remove(j);
        }
        else
        {
            // Found a cigar operation that should not be deleted, so stop deleting.
            break;
        }
    } 

    // Determine the number of soft clips.
    int32_t numSoftClips = record.getReadLength() - readClipPosition;
    // NOTE that if the previous operation is a softclip, the CigarRoller logic
    // will merge this with that one.
    newCigar.Add(Cigar::softClip, numSoftClips);

    // Check if an ending hard clip needs to be added.
    if(cigar->size() != 0)
    {
        const Cigar::CigarOperator* lastOp = 
            &(cigar->getOperator(cigar->size() - 1));
        if(lastOp->operation == Cigar::hardClip)
        {
            newCigar += *lastOp;
        }
    }

    return(readClipPosition);
}
Ejemplo n.º 11
0
// Add an entry to this pileup element.  
void PileupElementBaseQual::addEntry(SamRecord& record)
{
    // Call the base class:
    PileupElement::addEntry(record);

    if(myRefAllele.empty())
    {
    	genomeIndex_t markerIndex = (*myRefSeq).getGenomePosition(getChromosome(), static_cast<uint32_t>(getRefPosition()+1));
        myRefAllele = (*myRefSeq)[markerIndex];
    }

    // Increment the index
    ++myIndex;
    
    // if the index has gone beyond the allocated space, double the size.
    if(myIndex >= myAllocatedSize)
    {
        char* tempBuffer = (char*)realloc(myBases, myAllocatedSize * 2);
        if(tempBuffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myBases = tempBuffer;
        int8_t* tempInt8Buffer = (int8_t*)realloc(myMapQualities, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myMapQualities = tempInt8Buffer; 
        tempInt8Buffer = (int8_t*)realloc(myQualities, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myQualities = tempInt8Buffer;
        tempBuffer = (char*)realloc(myStrands, myAllocatedSize * 2);
        if(tempBuffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myStrands = tempBuffer;
        tempInt8Buffer = (int8_t*)realloc(myCycles, myAllocatedSize * 2 * sizeof(int8_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myCycles = tempInt8Buffer; 
        int16_t* tempInt16Buffer = (int16_t*)realloc(myGLScores, myAllocatedSize * 2 * sizeof(int16_t));
        if(tempInt8Buffer == NULL)
        {
            std::cerr << "Memory Allocation Failure\n";
            // TODO
            return;
        }
        myGLScores = tempInt16Buffer;
        myAllocatedSize = myAllocatedSize * 2;
    }

    Cigar* cigar = record.getCigarInfo();
    
    if(cigar == NULL)
    {
        throw std::runtime_error("Failed to retrieve cigar info from the record.");
    }

    int32_t readIndex = 
        cigar->getQueryIndex(getRefPosition(), record.get0BasedPosition());

    // If the readPosition is N/A, this is a deletion.
    if(readIndex != CigarRoller::INDEX_NA)
    {
        char base = record.getSequence(readIndex);
        int8_t mapQual = record.getMapQuality();
        //-33 to obtain the PHRED base quality
        char qual = record.getQuality(readIndex) - 33;
        if(qual == UNSET_QUAL)
        {
            qual = ' ';
        }
        char strand = (record.getFlag() & 0x0010) ? 'R' : 'F';
        int cycle = strand == 'F' ? readIndex + 1 : record.getReadLength() -  readIndex;
        myBases[myIndex] = base;
        myMapQualities[myIndex] = mapQual;
        myQualities[myIndex] = qual;
        myStrands[myIndex] = strand;
        myCycles[myIndex] = cycle;
    }
    else if(myAddDelAsBase)
    {
        int8_t mapQual = record.getMapQuality();
        char strand = (record.getFlag() & 0x0010) ? 'R' : 'F';
        myBases[myIndex] = '-';
        myMapQualities[myIndex] = mapQual;
        myQualities[myIndex] = -1;
        myStrands[myIndex] = strand;
        myCycles[myIndex] = -1;
    }
    else
    {
        // Do not add a deletion.
        // Did not add any entries, so decrement the index counter since the
        // index was not used.
        --myIndex;
    }
}
Ejemplo n.º 12
0
/*
	if a discordant read is mapped to MEI (overlap with MEI coord)
		add centor of ( anchor_end + 3*avr_ins_var )
	skip unmap & clip
	check 3 types at the same time
*/
void Sites::AddDiscoverSiteFromSingleBam( SingleSampleInfo & si )
{
/*for(int i=0;i<NMEI; i++) {
std::cout << "m=" << i << ": ";
for(map<string, map<int, bool> >::iterator t=meiCoord[m].begin(); t!=meiCoord[m].end(); t++)
std::cout << t->first << "->" << t->second.size() << " ";
std::cout << std::endl;
}*/

	avr_read_length = si.avr_read_length;
	avr_ins_size = si.avr_ins_size;
	min_read_length = avr_read_length / 3;
	current_depth = si.depth;
//	total_depth += current_depth;

	resetDepthAddFlag();
	
	SamFile bam;
	SamFileHeader bam_header;
	OpenBamAndBai( bam,bam_header, si.bam_name );
	for( int i=0; i<pchr_list->size(); i++ ) {
		string chr = (*pchr_list)[i];
//		if ( !single_chr.empty() && chr.compare(single_chr)!=0 )
//			continue;
		if ( siteList.find(chr) == siteList.end() )
			siteList[chr].clear();
//		map<string, map<int, SingleSite> >::iterator pchr = siteList[m].find(chr);
//		map<string, map<int, bool> >::iterator coord_chr_ptr = meiCoord[m].find(chr);
//		if (coord_chr_ptr == meiCoord[m].end())
//			continue;
		bool section_status;
		if (range_start<0) { // no range
			section_status = bam.SetReadSection( chr.c_str() );
			if (!section_status) {
				string str = "Cannot set read section at chr " + chr;
				morphWarning( str );
			}
		}
		else { // set range
			section_status = bam.SetReadSection( chr.c_str(), range_start, range_end );
			if (!section_status) {
				string str = "Cannot set read section at chr " + chr + " " + std::to_string(range_start) + "-" + std::to_string(range_end); 
				morphWarning( str );
			}			
		}
		
		// DO ADDING
//		if (siteList[chr].empty())
//			p_reach_last = 1;
//		else {
//			p_reach_last = 0;
		pnearest = siteList[chr].begin();
//		}
		SingleSite new_site; // temporary cluster. will be added to map later.
		new_site.depth = current_depth;
		bool start_new = 1; // check if need to add new_site to map and start new new_site
		SamRecord rec;
		int between = 0; // count #reads after new_site.end. If end changed, add it to rcount and reset to zero
		while( bam.ReadRecord( bam_header, rec ) ) {
			if (!start_new) {
				if (rec.get1BasedPosition() >= new_site.end)
					between++;
				else
					new_site.rcount++;
			}
			if (rec.getFlag() & 0x2)
				continue;
			if ( OneEndUnmap( rec.getFlag() ) )
				continue;
			if ( IsSupplementary(rec.getFlag()) )
				continue;
			if ( rec.getReadLength() < min_read_length )
				continue;
			if ( rec.getMapQuality() < MIN_QUALITY )
				continue;
			if (chr.compare(rec.getMateReferenceName())==0 && rec.getInsertSize() < abs(avr_ins_size*2))
				continue;
			bool is_mei = 0;
			vector<bool> is_in_coord;
			is_in_coord.resize(3, 0);
			for(int m=0; m<NMEI; m++) {
				map<string, map<int, bool> >::iterator coord_chr_ptr = meiCoord[m].find(rec.getMateReferenceName());
				if (coord_chr_ptr == meiCoord[m].end())
					is_in_coord[m] = 0;
				else
					is_in_coord[m] = isWithinCoord( rec.get1BasedMatePosition(), coord_chr_ptr->second ); // within MEI coord
				if (is_in_coord[m])
					is_mei = 1;
			}
			if (!is_mei)
				continue;
			if (start_new) {
				setNewCluster( is_in_coord, new_site,rec);
				start_new = 0;
				between = 0;
			}
			else { // add to existing cluster
				if ( rec.get1BasedPosition() > new_site.end + avr_ins_size ) { // start new coord
					addClusterToMap(new_site, siteList[chr]);
					setNewCluster( is_in_coord, new_site, rec);
					start_new = 0;
					between = 0;
				}
				else {
					addToCurrentCluster( is_in_coord, new_site, rec);
					new_site.rcount += between;
					between = 0;
				}
			}
		}
		// add last one
		if (!start_new)
			addClusterToMap(new_site, siteList[chr]);
	}
	bam.Close();
}