/** * Report a maxed-out read. */ void VerboseHitSink::reportMaxed( vector<Hit>& hs, size_t threadId, PatternSourcePerThread& p) { HitSink::reportMaxed(hs, threadId, p); if(sampleMax_) { RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2); reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid()); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; Hit& h = hs[r]; h.oms = (uint32_t)hs.size(); reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid()); } } }
/** * Report maxed-out read; if sampleMax_ is set, then report 1 alignment * at random. */ void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) { if(sampleMax_) { HitSink::reportMaxed(hs, p); RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { reportSamHits(hs, i, i+2, 0, hs.size()/2+1); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1); } } else { reportUnOrMax(p, &hs, false); } }
void EventRecorder::registerRandomSource(RandomSource &rnd, const String &name) { if (_recordMode == kRecorderRecord) { RandomSourceRecord rec; rec.name = name; rec.seed = rnd.getSeed(); _randomSourceRecords.push_back(rec); } if (_recordMode == kRecorderPlayback) { for (uint i = 0; i < _randomSourceRecords.size(); ++i) { if (_randomSourceRecords[i].name == name) { rnd.setSeed(_randomSourceRecords[i].seed); _randomSourceRecords.remove_at(i); break; } } } }
/** * Start the driver. The driver will begin by conducting a best-first, * index-assisted search through the space of possible full and partial * alignments. This search may be followed up with a dynamic programming * extension step, taking a prioritized set of partial SA ranges found * during the search and extending each with DP. The process might also be * iterated, with the search being occasioanally halted so that DPs can be * tried, then restarted, etc. */ int AlignerDriver::go( const Scoring& sc, const Ebwt& ebwtFw, const Ebwt& ebwtBw, const BitPairReference& ref, DescentMetrics& met, WalkMetrics& wlm, PerReadMetrics& prm, RandomSource& rnd, AlnSinkWrap& sink) { if(paired_) { // Paired-end - alternate between advancing dr1_ / dr2_ whenever a // new full alignment is discovered in the one currently being // advanced. Whenever a new full alignment is found, check to see // if it pairs with a previously discovered alignment. bool first1 = rnd.nextBool(); bool first = true; DescentStoppingConditions stopc1 = stop_; DescentStoppingConditions stopc2 = stop_; size_t totszIncr = (stop_.totsz + 7) / 8; stopc1.totsz = totszIncr; stopc2.totsz = totszIncr; while(stopc1.totsz <= stop_.totsz && stopc2.totsz <= stop_.totsz) { if(first && first1 && stopc1.totsz <= stop_.totsz) { dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm); stopc1.totsz += totszIncr; } if(stopc2.totsz <= stop_.totsz) { dr2_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm); stopc2.totsz += totszIncr; } first = false; } } else { // Unpaired size_t iter = 1; while(true) { int ret = dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm); if(ret == DESCENT_DRIVER_ALN) { //cerr << iter << ". DESCENT_DRIVER_ALN" << endl; } else if(ret == DESCENT_DRIVER_MEM) { //cerr << iter << ". DESCENT_DRIVER_MEM" << endl; break; } else if(ret == DESCENT_DRIVER_STRATA) { // DESCENT_DRIVER_STRATA is returned by DescentDriver.advance() // when it has finished with a "non-empty" stratum: a stratum // in which at least one alignment was found. Here we report // the alignments in an arbitrary order. AlnRes res; // Initialize alignment selector with the DescentDriver's // alignment sink alsel_.init( dr1_.query(), dr1_.sink(), ebwtFw, ref, rnd, wlm); while(!alsel_.done() && !sink.state().doneWithMate(true)) { res.reset(); bool ret2 = alsel_.next( dr1_, ebwtFw, ref, rnd, res, wlm, prm); if(ret2) { // Got an alignment assert(res.matchesRef( dr1_.query(), ref, tmp_rf_, tmp_rdseq_, tmp_qseq_, raw_refbuf_, raw_destU32_, raw_matches_)); // Get reference interval involved in alignment Interval refival(res.refid(), 0, res.fw(), res.reflen()); assert_gt(res.refExtent(), 0); // Does alignment falls off end of reference? if(gReportOverhangs && !refival.containsIgnoreOrient(res.refival())) { res.clipOutside(true, 0, res.reflen()); if(res.refExtent() == 0) { continue; } } assert(gReportOverhangs || refival.containsIgnoreOrient(res.refival())); // Alignment fell entirely outside the reference? if(!refival.overlapsIgnoreOrient(res.refival())) { continue; // yes, fell outside } // Alignment redundant with one we've seen previously? if(red1_.overlap(res)) { continue; // yes, redundant } red1_.add(res); // so we find subsequent redundancies // Report an unpaired alignment assert(!sink.state().doneWithMate(true)); assert(!sink.maxed()); if(sink.report(0, &res, NULL)) { // Short-circuited because a limit, e.g. -k, -m or // -M, was exceeded return ALDRIVER_POLICY_FULFILLED; } } } dr1_.sink().advanceStratum(); } else if(ret == DESCENT_DRIVER_BWOPS) { //cerr << iter << ". DESCENT_DRIVER_BWOPS" << endl; } else if(ret == DESCENT_DRIVER_DONE) { //cerr << iter << ". DESCENT_DRIVER_DONE" << endl; break; } else { assert(false); } iter++; } } return ALDRIVER_EXHAUSTED_CANDIDATES; }
/** * Called by startThread when a new search thread is initialized. Actually do alignment. */ void work() { Read r; HitSet hitset; AlignResult res; RandomSource rnd; int64_t ltotHits = 0, lalReads = 0, lunalReads = 0, lmaxReads = 0; int64_t ltotSeedHits = 0, lmaxSeedHits = 0; int64_t lunsampled = 0; int64_t skipped = 0; while(rs_->next(r)) { assert(r.repOk()); if(skipped < readskip) { skipped++; continue; } rnd.init(randseed ^ r.rand.nextU32()); // If sampleRate < 1.0f, apply sampling odds to this read; // if it isn't chosen, skip it. if(sampleRate < 1.0f && rnd.nextDouble() >= sampleRate) { // Not chosen lunsampled++; continue; } r.color = /*r.hitset.color =*/ color; // If the number of unskipped reads exceeds the readmax // ceiling, break out of the loop. Note that there's a // minor race condition here. if(nreads.value() + 1 > readmax) { if(verbose) { cout << "Stopping because readmax " << readmax << " was exceeded" << endl; } break; } // Trim as requested by the user. Could do something more // sophisticated here. r.trim3(trim3); r.trim5(trim5); //r.initHitset(); hitset.reset(); nreads++; // The read must be at least as long as the mer length that // was used when building the index. if(r.seq.length() < (size_t)ap.iSeedLen) { if(!quiet) { cerr << "Warning: Skipping read " << r.name << " because length " << r.seq.length() << " was less than indexable seed length " << ap.iSeedLen << endl; } os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_INDEX); continue; } // The read must be at least as long as the mer length that // was used when building the index. size_t clen = r.seq.length(); if(ap.minLen != -1 && clen < (size_t)ap.adjMinLen) { if(!quiet) { cerr << "Warning: Skipping read " << r.name << " because length " << clen << " was such that alignment length would be less " << "than --minlen: " << ap.minLen << endl; } os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_MINLEN_PARAMS); continue; } // The read is trimmed and has passed all filters. Next we // align it. if(verbose) cout << " aligning read: " << r << endl; if(!ind_->empty()) { res.clear(); // clear the alignment results structure //assert(iformat == INPUT_CHAININ || r.hitset.maxedStratum == -1); ind_->query(r, *refs_, rmap_, amap_, hitset, *os_, res, ap, true, rnd, tid_); // Update per-thread counters if(res.hits > 0) { lalReads++; ltotHits += res.hits; } else if(res.maxed) lmaxReads++; else lunalReads++; ltotSeedHits += res.seedHits; lmaxSeedHits = max<int64_t>(lmaxSeedHits, res.seedHits); if(res.bail) { // The aligner signaled that we should bail at this // point. throw 1; } } else { // If the index is empty, there can't possibly be any // hits. TODO: this seems like something that should // cause an error early on. hitset.reportUpTo(r, *os_, ap.khits, *refs_, rmap_, false, amap_); } if((nreads.value()+1) % updateEvery == 0) { // Fold per-thread counters into global counters unalReads += lunalReads; alReads += lalReads; maxReads += lmaxReads; maxSeedHits.max(lmaxSeedHits); totSeedHits += ltotSeedHits; totAls += ltotHits; totUnsampled += lunsampled; lunalReads = 0; lalReads = 0; lmaxReads = 0; ltotSeedHits = 0; ltotHits = 0; lunsampled = 0; } } // Update global counters in synchronized fashion unalReads += lunalReads; alReads += lalReads; maxReads += lmaxReads; maxSeedHits.max(lmaxSeedHits); totSeedHits += ltotSeedHits; totAls += ltotHits; totUnsampled += lunsampled; }