void placeContainsUsingBestOverlaps(UnitigVector &unitigs) { uint32 fragsPlaced = 1; uint32 fragsPending = 0; logFileFlags &= ~LOG_PLACE_FRAG; while (fragsPlaced > 0) { fragsPlaced = 0; fragsPending = 0; writeLog("==> PLACING CONTAINED FRAGMENTS\n"); for (uint32 fid=1; fid<FI->numFragments()+1; fid++) { BestContainment *bestcont = OG->getBestContainer(fid); Unitig *utg; if (bestcont->isContained == false) // Not a contained fragment. continue; if (Unitig::fragIn(fid) != 0) // Containee already placed. continue; if (Unitig::fragIn(bestcont->container) == 0) { // Container not placed (yet). fragsPending++; continue; } utg = unitigs[Unitig::fragIn(bestcont->container)]; utg->addContainedFrag(fid, bestcont, logFileFlagSet(LOG_INITIAL_CONTAINED_PLACEMENT)); if (utg->id() != Unitig::fragIn(fid)) writeLog("placeContainsUsingBestOverlaps()-- FAILED to add frag %d to unitig %d.\n", fid, bestcont->container); assert(utg->id() == Unitig::fragIn(fid)); fragsPlaced++; } writeLog("==> PLACING CONTAINED FRAGMENTS - placed %d fragments; still need to place %d\n", fragsPlaced, fragsPending); if ((fragsPlaced == 0) && (fragsPending > 0)) { writeLog("Stopping contained fragment placement due to zombies.\n"); fragsPlaced = 0; fragsPending = 0; } } for (uint32 ti=1; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg) utg->sort(); } }
// After splitting and ejecting some contains, check for discontinuous unitigs. // void splitDiscontinuousUnitigs(UnitigVector &unitigs, uint32 minOverlap) { writeLog("==> SPLIT DISCONTINUOUS\n"); uint32 numTested = 0; uint32 numSplit = 0; uint32 numCreated = 0; uint32 splitFragsLen = 0; uint32 splitFragsMax = 0; ufNode *splitFrags = NULL; for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *tig = unitigs[ti]; if ((tig == NULL) || (tig->ufpath.size() < 2)) continue; // Unitig must be sorted. Someone upstream os screwing this up. tig->sort(); // We'll want to build an array of new fragments to split out. This can be up // to the size of the largest unitig. splitFragsMax = MAX(splitFragsMax, tig->ufpath.size()); // Check that the unitig starts at position zero. Not critical for the next loop, but // needs to be dome sometime. int32 minPos = MIN(tig->ufpath[0].position.bgn, tig->ufpath[0].position.end); if (minPos == 0) continue; writeLog("splitDiscontinuous()-- tig "F_U32" offset messed up; reset by "F_S32".\n", tig->id(), minPos); for (uint32 fi=0; fi<tig->ufpath.size(); fi++) { ufNode *frg = &tig->ufpath[fi]; frg->position.bgn -= minPos; frg->position.end -= minPos; } } splitFrags = new ufNode [splitFragsMax]; // Now, finally, we can check for gaps in unitigs. for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *tig = unitigs[ti]; if ((tig == NULL) || (tig->ufpath.size() < 2)) continue; // We don't expect many unitigs to be broken, so we'll do a first quick pass to just // test if it is. int32 maxEnd = MAX(tig->ufpath[0].position.bgn, tig->ufpath[0].position.end); bool isBroken = false; for (uint32 fi=0; fi<tig->ufpath.size(); fi++) { ufNode *frg = &tig->ufpath[fi]; int32 bgn = MIN(frg->position.bgn, frg->position.end); int32 end = MAX(frg->position.bgn, frg->position.end); if (bgn > maxEnd - minOverlap) { isBroken = true; break; } maxEnd = MAX(maxEnd, end); } numTested++; if (isBroken == false) continue; numSplit++; // Dang, busted unitig. Fix it up. splitFragsLen = 0; maxEnd = 0; if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS)) writeLog("splitDiscontinuous()-- discontinuous tig "F_U32" with "F_SIZE_T" fragments broken into:\n", tig->id(), tig->ufpath.size()); for (uint32 fi=0; fi<tig->ufpath.size(); fi++) { ufNode *frg = &tig->ufpath[fi]; int32 bgn = MIN(frg->position.bgn, frg->position.end); int32 end = MAX(frg->position.bgn, frg->position.end); // Good thick overlap exists to this fragment, save it. if (bgn <= maxEnd - minOverlap) { assert(splitFragsLen < splitFragsMax); splitFrags[splitFragsLen++] = *frg; maxEnd = MAX(maxEnd, end); continue; } // No thick overlap found. We need to break right here before the current fragment. // If there is exactly one fragment, and it's contained, and it's not mated, move it to the // container. (This has a small positive benefit over just making every read a singleton). // if ((splitFragsLen == 1) && (FI->mateIID(splitFrags[0].ident) == 0) && (splitFrags[0].contained != 0)) { Unitig *dangler = unitigs[tig->fragIn(splitFrags[0].contained)]; // If the parent isn't in a unitig, we must have shattered the repeat unitig it was in. // Do the same here. if (dangler == NULL) { if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS)) writeLog("splitDiscontinuous()-- singleton frag "F_U32" shattered.\n", splitFrags[0].ident); Unitig::removeFrag(splitFrags[0].ident); } else { assert(dangler->id() == tig->fragIn(splitFrags[0].contained)); if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS)) writeLog("splitDiscontinuous()-- old tig "F_U32" with "F_SIZE_T" fragments (contained frag "F_U32" moved here).\n", dangler->id(), dangler->ufpath.size() + 1, splitFrags[0].ident); BestContainment *bestcont = OG->getBestContainer(splitFrags[0].ident); assert(bestcont->isContained == true); dangler->addContainedFrag(splitFrags[0].ident, bestcont, false); dangler->bubbleSortLastFrag(); assert(dangler->id() == Unitig::fragIn(splitFrags[0].ident)); } } // Otherwise, make an entirely new unitig for these fragments. else { numCreated++; makeNewUnitig(unitigs, splitFragsLen, splitFrags); tig = unitigs[ti]; } // Done with the split, save the current fragment. This resets everything. splitFragsLen = 0; splitFrags[splitFragsLen++] = *frg; maxEnd = end; } // If we did any splitting, then the length of the frags in splitFrags will be less than the length // of the path in the current unitig. Make a final new unitig for the remaining fragments. // if (splitFragsLen != tig->ufpath.size()) { numCreated++; makeNewUnitig(unitigs, splitFragsLen, splitFrags); delete unitigs[ti]; unitigs[ti] = NULL; } } writeLog("splitDiscontinuous()-- Tested "F_U32" unitigs, split "F_U32" into "F_U32" new unitigs.\n", numTested, numSplit, numCreated); delete [] splitFrags; }
void placeUnplacedUsingAllOverlaps(UnitigVector &unitigs, const char *prefix) { uint32 fiLimit = FI->numFragments(); uint32 numThreads = omp_get_max_threads(); uint32 blockSize = (fiLimit < 100 * numThreads) ? numThreads : fiLimit / 99; uint32 *placedTig = new uint32 [FI->numFragments() + 1]; SeqInterval *placedPos = new SeqInterval [FI->numFragments() + 1]; memset(placedTig, 0, sizeof(uint32) * (FI->numFragments() + 1)); memset(placedPos, 0, sizeof(SeqInterval) * (FI->numFragments() + 1)); // Just some logging. Count the number of reads we try to place. uint32 nToPlaceContained = 0; uint32 nToPlace = 0; uint32 nPlacedContained = 0; uint32 nPlaced = 0; uint32 nFailedContained = 0; uint32 nFailed = 0; for (uint32 fid=1; fid<FI->numFragments()+1; fid++) if (Unitig::fragIn(fid) == 0) if (OG->isContained(fid)) nToPlaceContained++; else nToPlace++; writeLog("placeContains()-- placing %u contained and %u unplaced reads, with %d threads.\n", nToPlaceContained, nToPlace, numThreads); // Do the placing! #pragma omp parallel for schedule(dynamic, blockSize) for (uint32 fid=1; fid<FI->numFragments()+1; fid++) { bool enableLog = true; if (Unitig::fragIn(fid) > 0) continue; // Place the read. vector<overlapPlacement> placements; placeFragUsingOverlaps(unitigs, AS_MAX_ERATE, NULL, fid, placements); // Search the placements for the highest expected identity placement using all overlaps in the unitig. uint32 b = UINT32_MAX; for (uint32 i=0; i<placements.size(); i++) { Unitig *tig = unitigs[placements[i].tigID]; if (placements[i].fCoverage < 0.99) // Ignore partially placed reads. continue; if (tig->ufpath.size() == 1) // Ignore placements in singletons. continue; uint32 bgn = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.bgn : placements[i].position.end; uint32 end = (placements[i].position.bgn < placements[i].position.end) ? placements[i].position.end : placements[i].position.bgn; double erate = placements[i].errors / placements[i].aligned; if (tig->overlapConsistentWithTig(5.0, bgn, end, erate) < 0.5) { if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED))) writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f) - HIGH ERROR\n", fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate); continue; } if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED))) writeLog("frag %8u tested tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n", fid, placements[i].tigID, tig->ufpath.size(), placements[i].position.bgn, placements[i].position.end, placements[i].fCoverage, erate); if ((b == UINT32_MAX) || (placements[i].errors / placements[i].aligned < placements[b].errors / placements[b].aligned)) b = i; } // If we didn't find a best, b will be invalid; set positions for adding to a new tig. // If we did, save both the position it was placed at, and the tigID it was placed in. if (b == UINT32_MAX) { if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED))) writeLog("frag %8u remains unplaced\n", fid); placedPos[fid].bgn = 0; placedPos[fid].end = FI->fragmentLength(fid); } else { if ((enableLog == true) && (logFileFlagSet(LOG_PLACE_UNPLACED))) writeLog("frag %8u placed tig %6u (%6u reads) at %8u-%8u (cov %7.5f erate %6.4f)\n", fid, placements[b].tigID, unitigs[placements[b].tigID]->ufpath.size(), placements[b].position.bgn, placements[b].position.end, placements[b].fCoverage, placements[b].errors / placements[b].aligned); placedTig[fid] = placements[b].tigID; placedPos[fid] = placements[b].position; } } // All reads placed, now just dump them in their correct tigs. for (uint32 fid=1; fid<FI->numFragments()+1; fid++) { Unitig *tig = NULL; ufNode frg; if (Unitig::fragIn(fid) > 0) continue; // If not placed, dump it in a new unitig. Well, not anymore. These reads were not placed in // any tig initially, were not allowed to seed a tig, and now, could find no place to go. // They're garbage. Plus, it screws up the logging above because we don't know the new tig ID // until now. if (placedTig[fid] == 0) { if (OG->isContained(fid)) nFailedContained++; else nFailed++; //tig = unitigs.newUnitig(false); } // Otherwise, it was placed somewhere, grab the tig. else { if (OG->isContained(fid)) nPlacedContained++; else nPlaced++; tig = unitigs[placedTig[fid]]; } // Regardless, add it to the tig. Logging for this is above. if (tig) { frg.ident = fid; frg.contained = 0; frg.parent = 0; frg.ahang = 0; frg.bhang = 0; frg.position = placedPos[fid]; tig->addFrag(frg, 0, false); } } // Cleanup. delete [] placedPos; delete [] placedTig; writeLog("placeContains()-- Placed %u contained reads and %u unplaced reads.\n", nPlacedContained, nPlaced); writeLog("placeContains()-- Failed to place %u contained reads (too high error suspected) and %u unplaced reads (lack of overlaps suspected).\n", nFailedContained, nFailed); // But wait! All the tigs need to be sorted. Well, not really _all_, but the hard ones to sort // are big, and those quite likely had reads added to them, so it's really not worth the effort // of tracking which ones need sorting, since the ones that don't need it are trivial to sort. for (uint32 ti=1; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg) utg->sort(); } }
static void joinUnitigs_append(UnitigVector &unitigs, joinEntry *join) { uint32 frId = Unitig::fragIn(join->frFragID); uint32 toId = Unitig::fragIn(join->toFragID); Unitig *fr = unitigs[frId]; Unitig *to = unitigs[toId]; uint32 frIdx = Unitig::pathPosition(join->frFragID); uint32 toIdx = Unitig::pathPosition(join->toFragID); // The 'fr' unitig is assumed to be forward, and assumed to be the one we join to. // Compute the offset for our append. We just need to compute where the join fragment would // appear in the unitig. The join fragment MUST be the first thing in the frUnitig. //int32 offset = MIN(frF.position.bgn, frF.position.end); // Over all fragments in the frUnitig, add them to either the joinUnitig or the discUnitig. Unitig *joinUnitig = unitigs.newUnitig(false); Unitig *discUnitig = unitigs.newUnitig(false); // Reverse the 'to' unitig if needed. if (join->toFlip) to->reverseComplement(true); // If we're joining off the 5' end of the fr untiig, add the to reads first. if (join->frFirst == true) { uint32 ii=0; for (; ii < toIdx; ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); for (; ii < to->ufpath.size(); ii++) discUnitig->addFrag(to->ufpath[ii], 0, false); } // Now add all the fr unitig reads. for (uint32 ii=0; ii < fr->ufpath.size(); ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); // If we're not joining off the 5' end, add the to unitig reads last. if (join->frFirst == false) { uint32 ii = 0; for (; ii < toIdx; ii++) discUnitig->addFrag(to->ufpath[ii], 0, false); for (; ii < to->ufpath.size(); ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); } // Delete the donor unitigs. delete fr; delete to; unitigs[frId] = NULL; unitigs[toId] = NULL; // And make sure the new unitigs are consistent. joinUnitig->sort(); discUnitig->sort(); }
void popBubbles(UnitigVector &unitigs, double deviationBubble) { BubTargetList potentialBubbles; findPotentialBubbles(unitigs, potentialBubbles); writeStatus("popBubbles()-- Found "F_SIZE_T" potential bubbles.\n", potentialBubbles.size()); //if (potentialBubbles.size() == 0) // return; writeLog("\n"); writeLog("Found "F_SIZE_T" potential bubbles.\n", potentialBubbles.size()); writeLog("\n"); vector<overlapPlacement> *placed = findBubbleReadPlacements(unitigs, potentialBubbles, deviationBubble); // We now have, in 'placed', a list of all the places that each read could be placed. Decide if there is a _single_ // place for each bubble to be popped. uint32 tiLimit = unitigs.size(); //uint32 tiNumThreads = omp_get_max_threads(); //uint32 tiBlockSize = (tiLimit < 100000 * tiNumThreads) ? tiNumThreads : tiLimit / 99999; // Clear flags. for (uint32 ti=0; ti<tiLimit; ti++) { if (unitigs[ti]) { unitigs[ti]->_isBubble = false; unitigs[ti]->_isRepeat = false; } } // In parallel, process the placements. for (uint32 ti=0; ti<tiLimit; ti++) { if (potentialBubbles.count(ti) == 0) // Not a potential bubble continue; // Scan the bubble, decide if there are _ANY_ read placements. Log appropriately. Unitig *bubble = unitigs[ti]; bool hasPlacements = false; for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) { uint32 readID = bubble->ufpath[fi].ident; if (placed[readID].size() > 0) hasPlacements = true; } if (hasPlacements == false) writeLog("potential bubble %u had no valid placements (all were not contained in target tig)\n", ti); else writeLog("potential bubble %u\n", ti); // Split the placements into piles for each target and build an interval list for each target. // For each read in the tig, convert the vector of placements into interval lists, one list per target tig. map<uint32, intervalList<uint32> *> targetIntervals; for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) { uint32 readID = bubble->ufpath[fi].ident; for (uint32 pp=0; pp<placed[readID].size(); pp++) { uint32 tid = placed[readID][pp].tigID; assert(placed[readID][pp].frgID > 0); uint32 bgn = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.bgn : placed[readID][pp].position.end; uint32 end = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.end : placed[readID][pp].position.bgn; if (targetIntervals[tid] == NULL) targetIntervals[tid] = new intervalList<uint32>; //writeLog("read %u -> tig %u intervals %u-%u\n", readID, tid, bgn, end); targetIntervals[tid]->add(bgn, end-bgn); } } vector<candidatePop *> targets; // Squish the intervals. Create new candidatePops for each interval that isn't too big or // small. Assign each overlapPlacements to the correct candidatePop. for (map<uint32, intervalList<uint32> *>::iterator it=targetIntervals.begin(); it != targetIntervals.end(); ++it) { uint32 targetID = it->first; intervalList<uint32> *IL = it->second; IL->merge(); // Discard intervals that are significantly too small or large. Save the ones that are // nicely sized. Logging here isn't terribly useful, it's just repeated (out of order) later // when we try to make sense of the read alignments. for (uint32 ii=0; ii<IL->numberOfIntervals(); ii++) { if ((IL->hi(ii) - IL->lo(ii) < 0.75 * bubble->getLength()) || // Too small! (1.25 * bubble->getLength() < IL->hi(ii) - IL->lo(ii))) { // Too big! writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - size mismatch, discarded\n", bubble->id(), bubble->getLength(), targetID, ii, IL->lo(ii), IL->hi(ii), IL->hi(ii) - IL->lo(ii)); continue; } writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u\n", bubble->id(), bubble->getLength(), targetID, ii, IL->lo(ii), IL->hi(ii), IL->hi(ii) - IL->lo(ii)); targets.push_back(new candidatePop(bubble, unitigs[targetID], IL->lo(ii), IL->hi(ii))); } delete IL; } targetIntervals.clear(); // If no targets, nothing to do. if (targets.size() == 0) continue; // Run through the placements again, and assign them to the correct target. // // For each read: // For each acceptable placement: // For each target location: // If the placement is for this target, save it. for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) { uint32 readID = bubble->ufpath[fi].ident; for (uint32 pp=0; pp<placed[readID].size(); pp++) { uint32 tid = placed[readID][pp].tigID; uint32 bgn = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.bgn : placed[readID][pp].position.end; uint32 end = (placed[readID][pp].position.bgn < placed[readID][pp].position.end) ? placed[readID][pp].position.end : placed[readID][pp].position.bgn; for (uint32 tt=0; tt<targets.size(); tt++) if ((targets[tt]->target->id() == tid) && (targets[tt]->bgn < end) && (bgn < targets[tt]->end)) targets[tt]->placed.push_back(placed[readID][pp]); } } // Count the number of targets that have all the reads (later: in the correct order, etc, etc). Remove those // that don't. uint32 nTargets = 0; set<uint32> tigReads; // Reads in the bubble tig. set<uint32> tgtReads; // Reads in the bubble that have a placement in the target. // Remove duplicate placements from each target. for (uint32 tt=0; tt<targets.size(); tt++) { candidatePop *t = targets[tt]; // Detect duplicates, keep the one with lower error. There are a lot of duplicate // placements, logging isn't terribly useful. for (uint32 aa=0; aa<t->placed.size(); aa++) { for (uint32 bb=0; bb<t->placed.size(); bb++) { if ((aa == bb) || (t->placed[aa].frgID != t->placed[bb].frgID) || (t->placed[aa].frgID == 0) || (t->placed[bb].frgID == 0)) continue; if (t->placed[aa].errors / t->placed[aa].aligned < t->placed[bb].errors / t->placed[bb].aligned) { #ifdef SHOW_MULTIPLE_PLACEMENTS writeLog("duplicate read alignment for tig %u read %u - better %u-%u %.4f - worse %u-%u %.4f\n", t->placed[aa].tigID, t->placed[aa].frgID, t->placed[aa].position.bgn, t->placed[aa].position.end, t->placed[aa].errors / t->placed[aa].aligned, t->placed[bb].position.bgn, t->placed[bb].position.end, t->placed[bb].errors / t->placed[bb].aligned); #endif t->placed[bb] = overlapPlacement(); } else { #ifdef SHOW_MULTIPLE_PLACEMENTS writeLog("duplicate read alignment for tig %u read %u - better %u-%u %.4f - worse %u-%u %.4f\n", t->placed[aa].tigID, t->placed[aa].frgID, t->placed[bb].position.bgn, t->placed[bb].position.end, t->placed[bb].errors / t->placed[bb].aligned, t->placed[aa].position.bgn, t->placed[aa].position.end, t->placed[aa].errors / t->placed[aa].aligned); #endif t->placed[aa] = overlapPlacement(); } } } // Get rid of any now-empty entries. for (uint32 aa=t->placed.size(); aa--; ) { if (t->placed[aa].frgID == 0) { t->placed[aa] = t->placed.back(); t->placed.pop_back(); } } } // Make a set of the reads in the bubble. We'll compare each target against this to decide if all reads are placed. for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) tigReads.insert(bubble->ufpath[fi].ident); uint32 nOrphan = 0; // Full coverage; bubble can be popped. uint32 orphanTarget = 0; uint32 nBubble = 0; // Partial coverage, bubble cannot be popped. uint32 bubbleTarget = 0; for (uint32 tt=0; tt<targets.size(); tt++) { tgtReads.clear(); for (uint32 op=0; op<targets[tt]->placed.size(); op++) { if (logFileFlagSet(LOG_BUBBLE_DETAIL)) writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - read %7u at %9u-%9u\n", bubble->id(), bubble->getLength(), targets[tt]->target->id(), tt, targets[tt]->bgn, targets[tt]->end, targets[tt]->end - targets[tt]->bgn, targets[tt]->placed[op].frgID, targets[tt]->placed[op].position.bgn, targets[tt]->placed[op].position.end); assert(targets[tt]->placed[op].frgID > 0); tgtReads.insert(targets[tt]->placed[op].frgID); } // Count the number of consecutive reads from the 5' or 3' end of the bubble that are placed // in the target. // // Also, count the number of reads in the bubble that are placed in the target. Likely the // same as n5 + n3. uint32 n5 = 0; uint32 n3 = 0; uint32 nt = 0; for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) if (tgtReads.count(bubble->ufpath[fi].ident) > 0) n5++; else break; for (uint32 fi=bubble->ufpath.size(); fi-->0; ) if (tgtReads.count(bubble->ufpath[fi].ident) > 0) n3++; else break; for (uint32 fi=0; fi<bubble->ufpath.size(); fi++) if (tgtReads.count(bubble->ufpath[fi].ident) > 0) nt++; // Report now, before we nuke targets[tt] for being not a bubble! if ((nt == bubble->ufpath.size()) || ((n5 > 0) && (n3 > 0))) writeLog("tig %8u length %9u -> target %8u piece %2u position %9u-%9u length %8u - expected %3"F_SIZE_TP" reads, had %3"F_SIZE_TP" reads. n5=%3u n3=%3u nt=%3u\n", bubble->id(), bubble->getLength(), targets[tt]->target->id(), tt, targets[tt]->bgn, targets[tt]->end, targets[tt]->end - targets[tt]->bgn, tigReads.size(), tgtReads.size(), n5, n3, nt); // Decide if this is a bubble, orphan from construction, or repeat. if (nt == bubble->ufpath.size()) { nOrphan++; orphanTarget = tt; } else if ((n5 > 0) && (n3 > 0)) { nBubble++; bubbleTarget = tt; } } // If no placements, pbbbt. if (nOrphan + nBubble == 0) { //writeLog("tig %8u length %8u reads %6u had no bubble or orphan placements.\n", bubble->id(), bubble->getLength(), bubble->ufpath.size()); continue; } // If multiple orphan and/or bubble placements, it's a repeat. if (nOrphan + nBubble > 1) { writeLog("tig %8u length %8u reads %6u - repeat - %u orphan %u bubble placements.\n", bubble->id(), bubble->getLength(), bubble->ufpath.size(), nOrphan, nBubble); writeLog("\n"); bubble->_isRepeat = true; continue; } // If a bubble placement, mark it as a bubble so it can be skipped during repeat detection. if (nBubble > 0) { writeLog("tig %8u length %8u reads %6u - bubble\n", bubble->id(), bubble->getLength(), bubble->ufpath.size()); writeLog("\n"); bubble->_isBubble = true; continue; } // Otherwise, it's an orphan, move the reads to the proper place. writeLog("tig %8u length %8u reads %6u - orphan\n", bubble->id(), bubble->getLength(), bubble->ufpath.size()); for (uint32 op=0, tt=orphanTarget; op<targets[tt]->placed.size(); op++) { ufNode frg; frg.ident = targets[tt]->placed[op].frgID; frg.contained = 0; frg.parent = 0; frg.ahang = 0; frg.bhang = 0; frg.position.bgn = targets[tt]->placed[op].position.bgn; frg.position.end = targets[tt]->placed[op].position.end; writeLog("move read %u from tig %u to tig %u %u-%u\n", frg.ident, bubble->id(), targets[tt]->target->id(), frg.position.bgn, frg.position.end); targets[tt]->target->addFrag(frg, 0, false); } writeLog("\n"); unitigs[bubble->id()] = NULL; delete bubble; } // Over all bubbles writeLog("\n"); // Needed if no bubbles are popped. delete [] placed; // Sort reads in all the tigs. Overkill, but correct. for (uint32 ti=0; ti<tiLimit; ti++) { Unitig *tig = unitigs[ti]; if ((tig == NULL) || // Not a tig, ignore it. (tig->ufpath.size() == 1)) // Singleton, already sorted. continue; tig->sort(); } }