Exemple #1
0
void
reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name) {

  if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0)
    return;

  uint32  numFragsT  = 0;
  uint32  numFragsP  = 0;
  uint64  utgLen     = 0;

  //  Compute average frags per partition.
  for (uint32  ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if (utg == NULL)
      continue;

    numFragsT += utg->ufpath.size();

    if (utg->ufpath.size() > 2)
      utgLen    += utg->getLength();
  }

  if      (utgLen < 16 * 1024 * 1024)
    numFragsP = numFragsT / 7;
  else if (utgLen < 64 * 1024 * 1024)
    numFragsP = numFragsT / 63;
  else
    numFragsP = numFragsT / 127;

  char tigStorePath[FILENAME_MAX];
  sprintf(tigStorePath, "%s.%03u.%s.tigStore", prefix, logFileOrder, name);

  //  Failing to do this results in consensus running about 40 times slower.  Three hours instead of
  //  five minutes.
  setParentAndHang(unitigs);

  writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false);
}
void
reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name, uint64 genomeSize) {

  //  Generate n50.  Assumes unitigs have been 'classified' already.

  vector<uint32>   unassembledLength;
  vector<uint32>   bubbleLength;
  vector<uint32>   repeatLength;
  vector<uint32>   circularLength;
  vector<uint32>   contigLength;

  for (uint32  ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if (utg == NULL)
      continue;

    if (utg->_isUnassembled) {
      unassembledLength.push_back(utg->getLength());
    }

    else if (utg->_isBubble) {
      bubbleLength.push_back(utg->getLength());
    }

    else if (utg->_isRepeat) {
      repeatLength.push_back(utg->getLength());
    }

    else if (utg->_isCircular) {
      circularLength.push_back(utg->getLength());
    }

    else {
      contigLength.push_back(utg->getLength());
    }
  }

  char   N[FILENAME_MAX];

  sprintf(N, "%s.sizes", getLogFilePrefix());

  errno = 0;
  FILE *F = fopen(N, "w");
  if (errno == 0) {
    reportN50(F, unassembledLength, "UNASSEMBLED", genomeSize);
    reportN50(F, bubbleLength,      "BUBBLE",      genomeSize);
    reportN50(F, repeatLength,      "REPEAT",      genomeSize);
    reportN50(F, circularLength,    "CIRCULAR",    genomeSize);
    reportN50(F, contigLength,      "CONTIGS",     genomeSize);

    fclose(F);
  }

  if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0)
    return;

  //  Dump to an intermediate store.

  char tigStorePath[FILENAME_MAX];
  sprintf(tigStorePath, "%s.tigStore", getLogFilePrefix());

  fprintf(stderr, "Creating intermediate tigStore '%s'\n", tigStorePath);

  uint32  numFragsT  = 0;
  uint32  numFragsP  = 0;
  uint64  utgLen     = 0;

  //  Compute average frags per partition.

  for (uint32  ti=0; ti<unitigs.size(); ti++) {
    Unitig  *utg = unitigs[ti];

    if (utg == NULL)
      continue;

    numFragsT += utg->ufpath.size();

    if (utg->ufpath.size() > 2)
      utgLen    += utg->getLength();
  }

  if      (utgLen < 16 * 1024 * 1024)
    numFragsP = numFragsT / 7;
  else if (utgLen < 64 * 1024 * 1024)
    numFragsP = numFragsT / 63;
  else
    numFragsP = numFragsT / 127;

  //  Dump the unitigs to an intermediate store.

  setParentAndHang(unitigs);

  writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false);
}