Ejemplo n.º 1
0
//  For every unitig, report the best overlaps contained in the
//  unitig, and all overlaps contained in the unitig.
//
//  Wow, this is ancient.
//
void
writeOverlapsUsed(UnitigVector &unitigs,
                  char         *prefix) {
  char   N[FILENAME_MAX];

  sprintf(N, "%s.unused.best.edges", prefix);

  FILE  *F = fopen(N, "w");

  for (uint32  ti=0; ti<unitigs.size(); ti++) {
    Unitig  *tig = unitigs[ti];
    Unitig  *ovl = NULL;
    char     tyt = 'C';

    if (tig == NULL)
      continue;

    if (tig->_isUnassembled)  tyt = 'U';
    if (tig->_isBubble)       tyt = 'B';
    if (tig->_isRepeat)       tyt = 'R';
    if (tig->_isCircular)     tyt = 'O';

    for (uint32 fi=0; fi<tig->ufpath.size(); fi++) {
      ufNode  *frg = &tig->ufpath[fi];
      ufNode  *oth = NULL;

      //  Report the unused best edge

      BestEdgeOverlap *be5 = OG->getBestEdgeOverlap(frg->ident, false);
      uint32   rd5 = (be5 == NULL) ?    0 : be5->fragId();
      Unitig  *tg5 = (be5 == NULL) ? NULL : unitigs[Unitig::fragIn(rd5)];
      char     ty5 = 'C';

      if ((tg5 != NULL) && (tg5->tigID() != tig->tigID())) {
        uint32  ord = Unitig::pathPosition(rd5);
        ufNode *oth = &tg5->ufpath[ord];

        if (tig->_isUnassembled)  ty5 = 'U';
        if (tig->_isBubble)       ty5 = 'B';
        if (tig->_isRepeat)       ty5 = 'R';
        if (tig->_isCircular)     ty5 = 'O';

        fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
                tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '5',
                be5->ahang(), be5->bhang(),
                tg5->tigID(), ty5, oth->ident, oth->position.bgn, oth->position.end, (be5->frag3p() == false) ? '5' : '3');
      }

      BestEdgeOverlap *be3 = OG->getBestEdgeOverlap(frg->ident, true);
      uint32   rd3 = (be3 == NULL) ?    0 : be3->fragId();
      Unitig  *tg3 = (be3 == NULL) ? NULL : unitigs[Unitig::fragIn(rd3)];
      char     ty3 = 'C';

      if ((tg3 != NULL) && (tg3->tigID() != tig->tigID())) {
        uint32  ord = Unitig::pathPosition(rd3);
        ufNode *oth = &tg3->ufpath[ord];

        if (tig->_isUnassembled)  ty3 = 'U';
        if (tig->_isBubble)       ty3 = 'B';
        if (tig->_isRepeat)       ty3 = 'R';
        if (tig->_isCircular)     ty3 = 'O';

        fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n",
                tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '3',
                be3->ahang(), be3->bhang(),
                tg3->tigID(), ty3, oth->ident, oth->position.bgn, oth->position.end, (be3->frag3p() == false) ? '5' : '3');
      }
    }
  }

  fclose(F);
}
Ejemplo n.º 2
0
void
writeUnitigsToStore(UnitigVector  &unitigs,
                    char          *fileprefix,
                    char          *tigStorePath,
                    uint32         frg_count_target,
                    bool           isFinal) {
  uint32      utg_count              = 0;
  uint32      frg_count              = 0;
  uint32      prt_count              = 1;
  char        filename[FILENAME_MAX] = {0};
  uint32     *partmap                = new uint32 [unitigs.size()];

  //  This code closely follows that in AS_CGB_unitigger.c::output_the_chunks()

  if (isFinal)
    checkUnitigMembership(unitigs);

  // Open up the initial output file

  sprintf(filename, "%s.iidmap", fileprefix);
  FILE *iidm = fopen(filename, "w");
  assert(NULL != iidm);

  sprintf(filename, "%s.partitioning", fileprefix);
  FILE *part = fopen(filename, "w");
  assert(NULL != part);

  sprintf(filename, "%s.partitioningInfo", fileprefix);
  FILE *pari = fopen(filename, "w");
  assert(NULL != pari);

  //  Step through all the unitigs once to build the partition mapping and IID mapping.

  tgStore     *tigStore = new tgStore(tigStorePath);
  tgTig       *tig      = new tgTig;

  for (uint32 tigID=0, ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if ((utg == NULL) || (utg->getNumFrags() == 0))
      continue;

    assert(utg->getLength() > 0);

    //  Convert the bogart tig to a tgTig and save to the store.

    unitigToTig(tig, (isFinal) ? tigID : ti, utg);
    tigID++;

    tigStore->insertTig(tig, false);

    //  Increment the partition if the current one is too large.

    if ((frg_count + utg->getNumFrags() >= frg_count_target) &&
        (frg_count                      >  0)) {
      fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n",
              prt_count, utg_count, frg_count);

      prt_count++;
      utg_count = 0;
      frg_count = 0;
    }

    //  Note that the tig is included in this partition.

    utg_count += 1;
    frg_count += utg->getNumFrags();

    //  Map the tig to a partition, and log both the tig-to-partition map and the partition-to-read map.

    fprintf(iidm, "bogart "F_U32" -> tig "F_U32" (in partition "F_U32" with "F_U32" frags)\n",
            utg->id(),
            utg->tigID(),
            prt_count,
            utg->getNumFrags());

    for (uint32 fragIdx=0; fragIdx<utg->getNumFrags(); fragIdx++)
      fprintf(part, "%d\t%d\n", prt_count, utg->ufpath[fragIdx].ident);
  }

  fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n",   //  Don't forget to log the last partition!
          prt_count, utg_count, frg_count);

  fclose(pari);
  fclose(part);
  fclose(iidm);

  delete    tig;
  delete    tigStore;
}