void promoteToSingleton(UnitigVector &unitigs, bool enablePromoteToSingleton) { for (uint32 fi=1; fi<=FI->numFragments(); fi++) { if (Unitig::fragIn(fi) != 0) // Placed already continue; if (FI->fragmentLength(fi) == 0) // Deleted. continue; if (enablePromoteToSingleton == false) { writeLog("promoteToSingleton()-- Repeat fragment "F_U32" removed from assembly.\n", fi); FI->markAsIgnore(fi); continue; } Unitig *utg = unitigs.newUnitig(false); ufNode frag; frag.ident = fi; frag.contained = 0; frag.parent = 0; frag.ahang = 0; frag.bhang = 0; frag.position.bgn = 0; frag.position.end = FI->fragmentLength(fi); frag.containment_depth = 0; utg->addFrag(frag, 0, false); } }
void promoteToSingleton(UnitigVector &unitigs) { for (uint32 fi=1; fi<=FI->numFragments(); fi++) { if (Unitig::fragIn(fi) != 0) // Placed already continue; if (FI->fragmentLength(fi) == 0) // Deleted. continue; Unitig *utg = unitigs.newUnitig(false); ufNode frag; frag.ident = fi; frag.contained = 0; frag.parent = 0; frag.ahang = 0; frag.bhang = 0; frag.position.bgn = 0; frag.position.end = FI->fragmentLength(fi); utg->addFrag(frag, 0, false); } }
static void makeNewUnitig(UnitigVector &unitigs, uint32 splitFragsLen, ufNode *splitFrags) { Unitig *dangler = unitigs.newUnitig(false); if (logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS)) writeLog("splitDiscontinuous()-- new tig "F_U32" with "F_U32" fragments (starting at frag "F_U32").\n", dangler->id(), splitFragsLen, splitFrags[0].ident); int splitOffset = -MIN(splitFrags[0].position.bgn, splitFrags[0].position.end); // This should already be true, but we force it still splitFrags[0].contained = 0; for (uint32 i=0; i<splitFragsLen; i++) dangler->addFrag(splitFrags[i], splitOffset, false); //logFileFlagSet(LOG_MATE_SPLIT_DISCONTINUOUS)); }
static void joinUnitigs_append(UnitigVector &unitigs, joinEntry *join) { uint32 frId = Unitig::fragIn(join->frFragID); uint32 toId = Unitig::fragIn(join->toFragID); Unitig *fr = unitigs[frId]; Unitig *to = unitigs[toId]; uint32 frIdx = Unitig::pathPosition(join->frFragID); uint32 toIdx = Unitig::pathPosition(join->toFragID); // The 'fr' unitig is assumed to be forward, and assumed to be the one we join to. // Compute the offset for our append. We just need to compute where the join fragment would // appear in the unitig. The join fragment MUST be the first thing in the frUnitig. //int32 offset = MIN(frF.position.bgn, frF.position.end); // Over all fragments in the frUnitig, add them to either the joinUnitig or the discUnitig. Unitig *joinUnitig = unitigs.newUnitig(false); Unitig *discUnitig = unitigs.newUnitig(false); // Reverse the 'to' unitig if needed. if (join->toFlip) to->reverseComplement(true); // If we're joining off the 5' end of the fr untiig, add the to reads first. if (join->frFirst == true) { uint32 ii=0; for (; ii < toIdx; ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); for (; ii < to->ufpath.size(); ii++) discUnitig->addFrag(to->ufpath[ii], 0, false); } // Now add all the fr unitig reads. for (uint32 ii=0; ii < fr->ufpath.size(); ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); // If we're not joining off the 5' end, add the to unitig reads last. if (join->frFirst == false) { uint32 ii = 0; for (; ii < toIdx; ii++) discUnitig->addFrag(to->ufpath[ii], 0, false); for (; ii < to->ufpath.size(); ii++) joinUnitig->addFrag(to->ufpath[ii], 0, false); } // Delete the donor unitigs. delete fr; delete to; unitigs[frId] = NULL; unitigs[toId] = NULL; // And make sure the new unitigs are consistent. joinUnitig->sort(); discUnitig->sort(); }
void placeZombies(UnitigVector &unitigs, double erate) { writeLog("==> SEARCHING FOR ZOMBIES\n"); uint32 *inUnitig = new uint32 [FI->numFragments()+1]; int numZombies = 0; // Mark fragments as dead, then unmark them if they are in a real living unitig. for (uint32 i=0; i<FI->numFragments()+1; i++) inUnitig[i] = noUnitig; for (uint32 ti=0; ti<unitigs.size(); ti++) { Unitig *utg = unitigs[ti]; if (utg == NULL) continue; for (uint32 fi=0; fi<utg->ufpath.size(); fi++) inUnitig[utg->ufpath[fi].ident] = utg->id(); } // For anything not in a living unitig, reload the overlaps and find a new container. // (NOT IMPLEMENTED - for now we just move these to new singleton unitigs). for (uint32 i=0; i<FI->numFragments()+1; i++) { if (FI->fragmentLength(i) == 0) // Deleted fragment continue; if (inUnitig[i] != noUnitig) // Valid fragment in a unitig continue; Unitig *utg = unitigs.newUnitig(false); ufNode frg; frg.ident = i; frg.contained = 0; frg.parent = 0; frg.ahang = 0; frg.bhang = 0; frg.position.bgn = 0; frg.position.end = FI->fragmentLength(i); frg.containment_depth = 0; utg->addFrag(frg, 0, false); writeLog("placeZombies()-- unitig %d created from zombie fragment %d\n", utg->id(), i); numZombies++; } writeLog("RESURRECTED %d ZOMBIE FRAGMENT%s.\n", numZombies, (numZombies != 1) ? "s" : ""); delete [] inUnitig; }
uint32 splitUnitigs(UnitigVector &unitigs, Unitig *tig, vector<breakPointCoords> &BP, Unitig **newTigs, int32 *lowCoord, uint32 *nRepeat, uint32 *nUnique, bool doMove) { uint32 nTigsCreated = 0; if (doMove == true) { memset(newTigs, 0, sizeof(Unitig *) * BP.size()); memset(lowCoord, 0, sizeof(int32) * BP.size()); } else { memset(nRepeat, 0, sizeof(uint32) * BP.size()); memset(nUnique, 0, sizeof(uint32) * BP.size()); } for (uint32 fi=0; fi<tig->ufpath.size(); fi++) { ufNode &frg = tig->ufpath[fi]; int32 frgbgn = min(frg.position.bgn, frg.position.end); int32 frgend = max(frg.position.bgn, frg.position.end); // Search for the region that matches the read. BP's are sorted in increasing order. It // probably doesn't matter, but makes the logging a little easier to read. uint32 rid = UINT32_MAX; bool rpt = false; //fprintf(stderr, "Searching for placement for read %u at %u-%u\n", frg.ident, frgbgn, frgend); for (uint32 ii=0; ii<BP.size(); ii++) { int32 rgnbgn = BP[ii]._bgn; int32 rgnend = BP[ii]._end; bool repeat = BP[ii]._isRepeat; // For repeats, the read must be contained fully. if ((repeat == true) && (rgnbgn <= frgbgn) && (frgend <= rgnend)) { rid = ii; rpt = true; break; } // For non-repeat, the read just needs to intersect. if ((repeat == false) && (rgnbgn < frgend) && (frgbgn < rgnend)) { rid = ii; rpt = false; break; } } if (rid == UINT32_MAX) { fprintf(stderr, "Failed to place read %u at %u-%u\n", frg.ident, frgbgn, frgend); for (uint32 ii=0; ii<BP.size(); ii++) fprintf(stderr, "Breakpoints %2u %8u-%8u repeat %u\n", ii, BP[ii]._bgn, BP[ii]._end, BP[ii]._isRepeat); } assert(rid != UINT32_MAX); // We searched all the BP's, the read had better be placed! // If moving reads, move the read! if (doMove) { if (newTigs[rid] == NULL) { lowCoord[rid] = frgbgn; newTigs[rid] = unitigs.newUnitig(true); // LOG_ADDUNITIG_BREAKING if (nRepeat[rid] > nUnique[rid]) newTigs[rid]->_isRepeat = true; } newTigs[rid]->addFrag(frg, -lowCoord[rid], false); //LOG_ADDFRAG_BREAKING); } // Else, we're not moving, just count how many reads came from repeats or uniques. else { if (rpt) nRepeat[rid]++; else nUnique[rid]++; } } // Return the number of tigs created. for (uint32 ii=0; ii<BP.size(); ii++) if (nRepeat[ii] + nUnique[ii] > 0) nTigsCreated++; return(nTigsCreated); }