Beispiel #1
0
void
NDalign::realignForward(bool verbose, bool displayAlign) {
  Match_Node_t  match;

  match.Start  = abgn();    //  Begin position in a
  match.Offset = bbgn();    //  Begin position in b
  match.Len    = 0;
  match.Next   = 0;         //  Not used here

  int32  aLo=0, aHi=0;
  int32  bLo=0, bHi=0;

  if (displayAlign)
    fprintf(stderr, "NDalign::realignForward()--\n");

  pedOverlapType  olapType = _editDist->Extend_Alignment(&match,         //  Initial exact match, relative to start of string
                                                         _aStr, _aLen,
                                                         _bStr, _bLen,
                                                         aLo,   aHi,    //  Output: Regions which the match extends
                                                         bLo,   bHi);

  aHi++;  //  Add one to the end point because Extend_Alignment returns the base-based coordinate.
  bHi++;

  //  Is this a better overlap than what we have?

  if (((score() <  _editDist->score())) ||
      ((score() <= _editDist->score()) && (length() > ((aHi - aLo) + (bHi - bLo) + _editDist->Left_Delta_Len) / 2))) {
    if (displayAlign)
      fprintf(stderr, "NDalign::realignForward()-- Save better alignment - OLD length %u erate %f score %u (%d-%d %d-%d)\n",
              length(), erate(), score(), abgn(), aend(), bbgn(), bend());

    _bestResult.save(aLo, aHi, bLo, bHi, _editDist->score(), olapType, _editDist->Left_Delta_Len, _editDist->Left_Delta);

    display("NDalign::realignForward()-- ", false);

    _bestResult.setErate(1.0 - (double)(_matches + _gapmatches) / (length() - _freegaps));

    if (displayAlign)
      fprintf(stderr, "NDalign::realignForward()-- Save better alignment - NEW length %u erate %f score %u (%d-%d %d-%d)\n",
              length(), erate(), score(), abgn(), aend(), bbgn(), bend());

  }

  else if (displayAlign) {
    fprintf(stderr, "NDalign::realignForward()-- Alignment no better   - OLD length %u erate %f score %u (%d-%d %d-%d)\n",
            length(), erate(), score(), abgn(), aend(), bbgn(), bend());
    fprintf(stderr, "NDalign::realignForward()-- Alignment no better   - NEW length %u erate %f score %u (%d-%d %d-%d)\n",
            ((aHi - aLo) + (bHi - bLo) + _editDist->Left_Delta_Len) / 2, 0.0, _editDist->score(), aLo, aHi, bLo, bHi);
    //display("NDalign::realignForward(NB)--", aLo, aHi, bLo, bHi, _editDist->Left_Delta, _editDist->Left_Delta_Len, true, false);
  }
}
Beispiel #2
0
char *
ovOverlap::toString(char                  *str,
                    ovOverlapDisplayType   type,
                    bool                   newLine) {

  switch (type) {
    case ovOverlapAsHangs:
      sprintf(str, "%10"F_U32P" %10"F_U32P"  %c  %6"F_S32P" %6"F_U32P" %6"F_S32P"  %7.6f%s%s",
              a_iid, b_iid,
              flipped() ? 'I' : 'N',
              a_hang(), span(), b_hang(),
              erate(),
              (overlapIsDovetail()) ? "" : "  PARTIAL",
              (newLine) ? "\n" : "");
      break;

    case ovOverlapAsCoords:
      sprintf(str, "%10"F_U32P" %10"F_U32P"  %c  %6"F_U32P"  %6"F_U32P" %6"F_U32P"  %6"F_U32P" %6"F_U32P"  %7.6f%s",
              a_iid, b_iid,
              flipped() ? 'I' : 'N',
              span(),
              a_bgn(), a_end(),
              b_bgn(), b_end(),
              erate(),
              (newLine) ? "\n" : "");
      break;

    case ovOverlapAsRaw:
      sprintf(str, "%10"F_U32P" %10"F_U32P"  %c  %6"F_U32P"  %6"F_U64P" %6"F_U64P"  %6"F_U64P" %6"F_U64P"  %7.6f %s %s %s%s",
              a_iid, b_iid,
              flipped() ? 'I' : 'N',
              span(),
              dat.ovl.ahg5, dat.ovl.ahg3,
              dat.ovl.bhg5, dat.ovl.bhg3,
              erate(),
              dat.ovl.forOBT ? "OBT" : "   ",
              dat.ovl.forDUP ? "DUP" : "   ",
              dat.ovl.forUTG ? "UTG" : "   ",
              (newLine) ? "\n" : "");
      break;

    case ovOverlapAsCompat:
      sprintf(str, "%8"F_U32P" %8"F_U32P"  %c  %6d  %6d  %5.2f  %5.2f%s",
              a_iid,
              b_iid,
              dat.ovl.flipped ? 'I' : 'N',
              a_hang(), b_hang(),
              erate() * 100.0,
              erate() * 100.0,
              (newLine) ? "\n" : "");
      break;
    case ovOverlapAsPaf:
      // miniasm/map expects entries to be separated by tabs
      // no padding spaces on names we don't confuse read identifiers
      sprintf(str, "%"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%c\t%"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P"\t%6"F_U32P" %s",
              a_iid,
              (g->gkStore_getRead(a_iid)->gkRead_sequenceLength()), a_bgn(), a_end(),
              flipped() ? '-' : '+',
              b_iid,
              (g->gkStore_getRead(b_iid)->gkRead_sequenceLength()), flipped() ? b_end() : b_bgn(), flipped() ? b_bgn() : b_end(),
              (uint32)floor(span() == 0 ? (1-erate() * (a_end()-a_bgn())) : (1-erate()) * span()),
              span() == 0 ? a_end() - a_bgn() : span(),
              255,
              (newLine) ? "\n" : "");
      break;

  }

  return(str);
}
Beispiel #3
0
bool
NDalign::processHits(void) {

  //  If the first time here, set the hit iterator to zero, otherwise move to the next one.
  //  And then return if there are no more hits to iterate over.

  if (_hitr == UINT32_MAX)
    _hitr = 0;
  else
    _hitr++;

  if (_hitr >= _hits.size())
    return(false);

  //  While hits, process them.
  //
  //  If a good hit is found, return, leaving hitr as is.  The next time we enter this function,
  //  we'll increment hitr and process the next hit.  If no good hit is found, we iterate the loop
  //  until a good one is found, or we run out of hits.

  for (; _hitr < _hits.size(); _hitr++) {
    Match_Node_t  match;

    match.Start  = _hits[_hitr].aBgn;    //  Begin position in a
    match.Offset = _hits[_hitr].bBgn;    //  Begin position in b
    match.Len    = _hits[_hitr].tLen;    //  tLen can include mismatches if alternate scoring is used!
    match.Next   = 0;                 //  Not used here


#ifdef SEED_NON_OVERLAPPING
    match.Offset = _merSize;  //  Really should track this in the hits, oh well.
#endif

#ifdef DEBUG_ALGORITHM
    fprintf(stderr, "\n");
    fprintf(stderr, "NDalign::processHits()-- Extend_Alignment Astart %d Bstart %d length %d\n", match.Start, match.Offset, match.Len);
#endif

    int32  aLo=0, aHi=0;
    int32  bLo=0, bHi=0;

    pedOverlapType  olapType = _editDist->Extend_Alignment(&match,         //  Initial exact match, relative to start of string
                                                           _aStr, _aLen,
                                                           _bStr, _bLen,
                                                           aLo,   aHi,    //  Output: Regions which the match extends
                                                           bLo,   bHi);

    aHi++;  //  Add one to the end point because Extend_Alignment returns the base-based coordinate.
    bHi++;

    //  Is this a better overlap than what we have?  Save it and update statistics.

    if (((score() <  _editDist->score())) ||
        ((score() <= _editDist->score()) && (length() > ((aHi - aLo) + (bHi - bLo) + _editDist->Left_Delta_Len) / 2))) {

#ifdef DEBUG_ALGORITHM
      fprintf(stderr, "NDalign::processHits()-- Save better alignment - OLD length %u erate %f score %u (%d-%d %d-%d) ",
              length(), erate(), score(), abgn(), aend(), bbgn(), bend());
#endif

      _bestResult.save(aLo, aHi, bLo, bHi, _editDist->score(), olapType, _editDist->Left_Delta_Len, _editDist->Left_Delta);

      display("NDalign::processHits()-- ", false);

      _bestResult.setErate(1.0 - (double)(_matches + _gapmatches) / (length() - _freegaps));

#ifdef DEBUG_ALGORITHM
      fprintf(stderr, "NDalign::processHits()-- NEW length %u erate %f score %u (%d-%d %d-%d)\n",
              length(), erate(), score(), abgn(), aend(), bbgn(), bend());
#endif

    } else {
      olapType = pedBothBranch;

#ifdef DEBUG_ALGORITHM
      fprintf(stderr, "NDalign::processHits()-- DON'T save alignment - OLD length %u erate %f score %u (%d-%d %d-%d) ",
              length(), erate(), score(), abgn(), aend(), bbgn(), bend());
      fprintf(stderr, "NDalign::processHits()--  NEW length %u score %u coords %u-%u %u-%u\n",
              ((aHi - aLo) + (bHi - bLo) + _editDist->Left_Delta_Len) / 2,
              _editDist->score(),
              aLo, aHi, bLo, bHi);
#endif
    }

    //  If a dovetail, we're done.  Let the client figure out if the quality is good.

    if (olapType == pedDovetail)
      return(true);

  }  //  Over all seeds.

  //  No more seeds to align.  Did we save an alignment?

  return(score() > 0);
}