// For every unitig, report the best overlaps contained in the // unitig, and all overlaps contained in the unitig. // // Wow, this is ancient. // void writeOverlapsUsed(UnitigVector &unitigs, char *prefix) { char N[FILENAME_MAX]; sprintf(N, "%s.unused.best.edges", prefix); FILE *F = fopen(N, "w"); for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *tig = unitigs[ti]; Unitig *ovl = NULL; char tyt = 'C'; if (tig == NULL) continue; if (tig->_isUnassembled) tyt = 'U'; if (tig->_isBubble) tyt = 'B'; if (tig->_isRepeat) tyt = 'R'; if (tig->_isCircular) tyt = 'O'; for (uint32 fi=0; fi<tig->ufpath.size(); fi++) { ufNode *frg = &tig->ufpath[fi]; ufNode *oth = NULL; // Report the unused best edge BestEdgeOverlap *be5 = OG->getBestEdgeOverlap(frg->ident, false); uint32 rd5 = (be5 == NULL) ? 0 : be5->fragId(); Unitig *tg5 = (be5 == NULL) ? NULL : unitigs[Unitig::fragIn(rd5)]; char ty5 = 'C'; if ((tg5 != NULL) && (tg5->tigID() != tig->tigID())) { uint32 ord = Unitig::pathPosition(rd5); ufNode *oth = &tg5->ufpath[ord]; if (tig->_isUnassembled) ty5 = 'U'; if (tig->_isBubble) ty5 = 'B'; if (tig->_isRepeat) ty5 = 'R'; if (tig->_isCircular) ty5 = 'O'; fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n", tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '5', be5->ahang(), be5->bhang(), tg5->tigID(), ty5, oth->ident, oth->position.bgn, oth->position.end, (be5->frag3p() == false) ? '5' : '3'); } BestEdgeOverlap *be3 = OG->getBestEdgeOverlap(frg->ident, true); uint32 rd3 = (be3 == NULL) ? 0 : be3->fragId(); Unitig *tg3 = (be3 == NULL) ? NULL : unitigs[Unitig::fragIn(rd3)]; char ty3 = 'C'; if ((tg3 != NULL) && (tg3->tigID() != tig->tigID())) { uint32 ord = Unitig::pathPosition(rd3); ufNode *oth = &tg3->ufpath[ord]; if (tig->_isUnassembled) ty3 = 'U'; if (tig->_isBubble) ty3 = 'B'; if (tig->_isRepeat) ty3 = 'R'; if (tig->_isCircular) ty3 = 'O'; fprintf(F, "tig %7u %c read %8u at %9u %-9u %c' -- %8d %-8d -- tig %7u %c read %8u at %9u %-9u %c'\n", tig->tigID(), tyt, frg->ident, frg->position.bgn, frg->position.end, '3', be3->ahang(), be3->bhang(), tg3->tigID(), ty3, oth->ident, oth->position.bgn, oth->position.end, (be3->frag3p() == false) ? '5' : '3'); } } } fclose(F); }
void writeUnitigsToStore(UnitigVector &unitigs, char *fileprefix, char *tigStorePath, uint32 frg_count_target, bool isFinal) { uint32 utg_count = 0; uint32 frg_count = 0; uint32 prt_count = 1; char filename[FILENAME_MAX] = {0}; uint32 *partmap = new uint32 [unitigs.size()]; // This code closely follows that in AS_CGB_unitigger.c::output_the_chunks() if (isFinal) checkUnitigMembership(unitigs); // Open up the initial output file sprintf(filename, "%s.iidmap", fileprefix); FILE *iidm = fopen(filename, "w"); assert(NULL != iidm); sprintf(filename, "%s.partitioning", fileprefix); FILE *part = fopen(filename, "w"); assert(NULL != part); sprintf(filename, "%s.partitioningInfo", fileprefix); FILE *pari = fopen(filename, "w"); assert(NULL != pari); // Step through all the unitigs once to build the partition mapping and IID mapping. tgStore *tigStore = new tgStore(tigStorePath); tgTig *tig = new tgTig; for (uint32 tigID=0, ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if ((utg == NULL) || (utg->getNumFrags() == 0)) continue; assert(utg->getLength() > 0); // Convert the bogart tig to a tgTig and save to the store. unitigToTig(tig, (isFinal) ? tigID : ti, utg); tigID++; tigStore->insertTig(tig, false); // Increment the partition if the current one is too large. if ((frg_count + utg->getNumFrags() >= frg_count_target) && (frg_count > 0)) { fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n", prt_count, utg_count, frg_count); prt_count++; utg_count = 0; frg_count = 0; } // Note that the tig is included in this partition. utg_count += 1; frg_count += utg->getNumFrags(); // Map the tig to a partition, and log both the tig-to-partition map and the partition-to-read map. fprintf(iidm, "bogart "F_U32" -> tig "F_U32" (in partition "F_U32" with "F_U32" frags)\n", utg->id(), utg->tigID(), prt_count, utg->getNumFrags()); for (uint32 fragIdx=0; fragIdx<utg->getNumFrags(); fragIdx++) fprintf(part, "%d\t%d\n", prt_count, utg->ufpath[fragIdx].ident); } fprintf(pari, "Partition %d has %d unitigs and %d fragments.\n", // Don't forget to log the last partition! prt_count, utg_count, frg_count); fclose(pari); fclose(part); fclose(iidm); delete tig; delete tigStore; }