Пример #1
0
void
promoteToSingleton(UnitigVector &unitigs, bool enablePromoteToSingleton) {

  for (uint32 fi=1; fi<=FI->numFragments(); fi++) {
    if (Unitig::fragIn(fi) != 0)
      //  Placed already
      continue;

    if (FI->fragmentLength(fi) == 0)
      //  Deleted.
      continue;

    if (enablePromoteToSingleton == false) {
      writeLog("promoteToSingleton()--  Repeat fragment "F_U32" removed from assembly.\n", fi);
      FI->markAsIgnore(fi);
      continue;
    }

    Unitig *utg = unitigs.newUnitig(false);
    ufNode  frag;

    frag.ident             = fi;
    frag.contained         = 0;
    frag.parent            = 0;
    frag.ahang             = 0;
    frag.bhang             = 0;
    frag.position.bgn      = 0;
    frag.position.end      = FI->fragmentLength(fi);
    frag.containment_depth = 0;

    utg->addFrag(frag, 0, false);
  }
}
void
promoteToSingleton(UnitigVector &unitigs) {

  for (uint32 fi=1; fi<=FI->numFragments(); fi++) {
    if (Unitig::fragIn(fi) != 0)
      //  Placed already
      continue;

    if (FI->fragmentLength(fi) == 0)
      //  Deleted.
      continue;

    Unitig *utg = unitigs.newUnitig(false);
    ufNode  frag;

    frag.ident             = fi;
    frag.contained         = 0;
    frag.parent            = 0;
    frag.ahang             = 0;
    frag.bhang             = 0;
    frag.position.bgn      = 0;
    frag.position.end      = FI->fragmentLength(fi);

    utg->addFrag(frag, 0, false);
  }
}
Пример #3
0
static
void
makeNewUnitig(UnitigVector &unitigs,
              uint32        splitFragsLen,
              ufNode       *splitFrags) {
  Unitig *dangler = unitigs.newUnitig(false);

  if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS))
    writeLog("splitDiscontinuous()--   new tig "F_U32" with "F_U32" fragments (starting at frag "F_U32").\n",
            dangler->id(), splitFragsLen, splitFrags[0].ident);

  int splitOffset = -MIN(splitFrags[0].position.bgn, splitFrags[0].position.end);

  //  This should already be true, but we force it still
  splitFrags[0].contained = 0;

  for (uint32 i=0; i<splitFragsLen; i++)
    dangler->addFrag(splitFrags[i], splitOffset, false);  //logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS));
}
Пример #4
0
static
void
joinUnitigs_append(UnitigVector &unitigs, joinEntry *join) {
  uint32    frId = Unitig::fragIn(join->frFragID);
  uint32    toId = Unitig::fragIn(join->toFragID);

  Unitig   *fr   = unitigs[frId];
  Unitig   *to   = unitigs[toId];

  uint32    frIdx = Unitig::pathPosition(join->frFragID);
  uint32    toIdx = Unitig::pathPosition(join->toFragID);

  //  The 'fr' unitig is assumed to be forward, and assumed to be the one we join to.

  //  Compute the offset for our append.  We just need to compute where the join fragment would
  //  appear in the unitig.  The join fragment MUST be the first thing in the frUnitig.

  //int32 offset = MIN(frF.position.bgn, frF.position.end);

  //  Over all fragments in the frUnitig, add them to either the joinUnitig or the discUnitig.

  Unitig *joinUnitig = unitigs.newUnitig(false);
  Unitig *discUnitig = unitigs.newUnitig(false);

  //  Reverse the 'to' unitig if needed.

  if (join->toFlip)
    to->reverseComplement(true);

  //  If we're joining off the 5' end of the fr untiig, add the to reads first.

  if (join->frFirst == true) {
    uint32 ii=0;

    for (; ii < toIdx; ii++)
      joinUnitig->addFrag(to->ufpath[ii], 0, false);

    for (; ii < to->ufpath.size(); ii++)
      discUnitig->addFrag(to->ufpath[ii], 0, false);
  }

  //  Now add all the fr unitig reads.

  for (uint32 ii=0; ii < fr->ufpath.size(); ii++)
    joinUnitig->addFrag(to->ufpath[ii], 0, false);

  //  If we're not joining off the 5' end, add the to unitig reads last.

  if (join->frFirst == false) {
    uint32 ii = 0;

    for (; ii < toIdx; ii++)
      discUnitig->addFrag(to->ufpath[ii], 0, false);

    for (; ii < to->ufpath.size(); ii++)
      joinUnitig->addFrag(to->ufpath[ii], 0, false);
  }

  //  Delete the donor unitigs.

  delete fr;
  delete to;

  unitigs[frId] = NULL;
  unitigs[toId] = NULL;

  //  And make sure the new unitigs are consistent.

  joinUnitig->sort();
  discUnitig->sort();
}
Пример #5
0
void
placeZombies(UnitigVector &unitigs, double erate) {

  writeLog("==> SEARCHING FOR ZOMBIES\n");

  uint32 *inUnitig   = new uint32 [FI->numFragments()+1];
  int     numZombies = 0;

  //  Mark fragments as dead, then unmark them if they are in a real living unitig.

  for (uint32 i=0; i<FI->numFragments()+1; i++)
    inUnitig[i] = noUnitig;

  for (uint32 ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if (utg == NULL)
      continue;

    for (uint32 fi=0; fi<utg->ufpath.size(); fi++)
      inUnitig[utg->ufpath[fi].ident] = utg->id();
  }

  //  For anything not in a living unitig, reload the overlaps and find a new container.
  //  (NOT IMPLEMENTED - for now we just move these to new singleton unitigs).

  for (uint32 i=0; i<FI->numFragments()+1; i++) {
    if (FI->fragmentLength(i) == 0)
      //  Deleted fragment
      continue;

    if (inUnitig[i] != noUnitig)
      //  Valid fragment in a unitig
      continue;

    Unitig      *utg = unitigs.newUnitig(false);
    ufNode       frg;

    frg.ident             = i;
    frg.contained         = 0;
    frg.parent            = 0;

    frg.ahang             = 0;
    frg.bhang             = 0;

    frg.position.bgn      = 0;
    frg.position.end      = FI->fragmentLength(i);

    frg.containment_depth = 0;

    utg->addFrag(frg, 0, false);

    writeLog("placeZombies()-- unitig %d created from zombie fragment %d\n",
            utg->id(), i);
    numZombies++;
  }

  writeLog("RESURRECTED %d ZOMBIE FRAGMENT%s.\n", numZombies, (numZombies != 1) ? "s" : "");

  delete [] inUnitig;
}
Пример #6
0
uint32
splitUnitigs(UnitigVector             &unitigs,
             Unitig                   *tig,
             vector<breakPointCoords> &BP,
             Unitig                  **newTigs,
             int32                    *lowCoord,
             uint32                   *nRepeat,
             uint32                   *nUnique,
             bool                      doMove) {
  uint32  nTigsCreated = 0;

  if (doMove == true) {
    memset(newTigs,  0, sizeof(Unitig *) * BP.size());
    memset(lowCoord, 0, sizeof(int32)    * BP.size());
  } else {
    memset(nRepeat,  0, sizeof(uint32)   * BP.size());
    memset(nUnique,  0, sizeof(uint32)   * BP.size());
  }

  for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
    ufNode     &frg    = tig->ufpath[fi];
    int32       frgbgn = min(frg.position.bgn, frg.position.end);
    int32       frgend = max(frg.position.bgn, frg.position.end);

    //  Search for the region that matches the read.  BP's are sorted in increasing order.  It
    //  probably doesn't matter, but makes the logging a little easier to read.

    uint32      rid = UINT32_MAX;
    bool        rpt = false;

    //fprintf(stderr, "Searching for placement for read %u at %u-%u\n", frg.ident, frgbgn, frgend);

    for (uint32 ii=0; ii<BP.size(); ii++) {
      int32   rgnbgn = BP[ii]._bgn;
      int32   rgnend = BP[ii]._end;
      bool    repeat = BP[ii]._isRepeat;

      //  For repeats, the read must be contained fully.

      if ((repeat == true) && (rgnbgn <= frgbgn) && (frgend <= rgnend)) {
        rid = ii;
        rpt = true;
        break;
      }

      //  For non-repeat, the read just needs to intersect.

      if ((repeat == false) && (rgnbgn < frgend) && (frgbgn < rgnend)) {
        rid = ii;
        rpt = false;
        break;
      }
    }

    if (rid == UINT32_MAX) {
      fprintf(stderr, "Failed to place read %u at %u-%u\n", frg.ident, frgbgn, frgend);
      for (uint32 ii=0; ii<BP.size(); ii++)
        fprintf(stderr, "Breakpoints %2u %8u-%8u repeat %u\n", ii, BP[ii]._bgn, BP[ii]._end, BP[ii]._isRepeat);
    }
    assert(rid != UINT32_MAX);  //  We searched all the BP's, the read had better be placed!

    //  If moving reads, move the read!

    if (doMove) {
      if (newTigs[rid] == NULL) {
        lowCoord[rid] = frgbgn;

        newTigs[rid]  = unitigs.newUnitig(true);  // LOG_ADDUNITIG_BREAKING

        if (nRepeat[rid] > nUnique[rid])
          newTigs[rid]->_isRepeat = true;
      }

      newTigs[rid]->addFrag(frg, -lowCoord[rid], false);  //LOG_ADDFRAG_BREAKING);
    }

    //  Else, we're not moving, just count how many reads came from repeats or uniques.

    else {
      if (rpt)
        nRepeat[rid]++;
      else
        nUnique[rid]++;
    }
  }

  //  Return the number of tigs created.

  for (uint32 ii=0; ii<BP.size(); ii++)
    if (nRepeat[ii] + nUnique[ii] > 0)
      nTigsCreated++;

  return(nTigsCreated);
}