void reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name) { if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0) return; uint32 numFragsT = 0; uint32 numFragsP = 0; uint64 utgLen = 0; // Compute average frags per partition. for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg == NULL) continue; numFragsT += utg->ufpath.size(); if (utg->ufpath.size() > 2) utgLen += utg->getLength(); } if (utgLen < 16 * 1024 * 1024) numFragsP = numFragsT / 7; else if (utgLen < 64 * 1024 * 1024) numFragsP = numFragsT / 63; else numFragsP = numFragsT / 127; char tigStorePath[FILENAME_MAX]; sprintf(tigStorePath, "%s.%03u.%s.tigStore", prefix, logFileOrder, name); // Failing to do this results in consensus running about 40 times slower. Three hours instead of // five minutes. setParentAndHang(unitigs); writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false); }
void reportUnitigs(UnitigVector &unitigs, const char *prefix, const char *name, uint64 genomeSize) { // Generate n50. Assumes unitigs have been 'classified' already. vector<uint32> unassembledLength; vector<uint32> bubbleLength; vector<uint32> repeatLength; vector<uint32> circularLength; vector<uint32> contigLength; for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg == NULL) continue; if (utg->_isUnassembled) { unassembledLength.push_back(utg->getLength()); } else if (utg->_isBubble) { bubbleLength.push_back(utg->getLength()); } else if (utg->_isRepeat) { repeatLength.push_back(utg->getLength()); } else if (utg->_isCircular) { circularLength.push_back(utg->getLength()); } else { contigLength.push_back(utg->getLength()); } } char N[FILENAME_MAX]; sprintf(N, "%s.sizes", getLogFilePrefix()); errno = 0; FILE *F = fopen(N, "w"); if (errno == 0) { reportN50(F, unassembledLength, "UNASSEMBLED", genomeSize); reportN50(F, bubbleLength, "BUBBLE", genomeSize); reportN50(F, repeatLength, "REPEAT", genomeSize); reportN50(F, circularLength, "CIRCULAR", genomeSize); reportN50(F, contigLength, "CONTIGS", genomeSize); fclose(F); } if (logFileFlagSet(LOG_INTERMEDIATE_UNITIGS) == 0) return; // Dump to an intermediate store. char tigStorePath[FILENAME_MAX]; sprintf(tigStorePath, "%s.tigStore", getLogFilePrefix()); fprintf(stderr, "Creating intermediate tigStore '%s'\n", tigStorePath); uint32 numFragsT = 0; uint32 numFragsP = 0; uint64 utgLen = 0; // Compute average frags per partition. for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg == NULL) continue; numFragsT += utg->ufpath.size(); if (utg->ufpath.size() > 2) utgLen += utg->getLength(); } if (utgLen < 16 * 1024 * 1024) numFragsP = numFragsT / 7; else if (utgLen < 64 * 1024 * 1024) numFragsP = numFragsT / 63; else numFragsP = numFragsT / 127; // Dump the unitigs to an intermediate store. setParentAndHang(unitigs); writeUnitigsToStore(unitigs, tigStorePath, tigStorePath, numFragsP, false); }