Esempio n. 1
0
/**
 * Report a maxed-out read.
 */
void VerboseHitSink::reportMaxed(
	vector<Hit>& hs,
	size_t threadId,
	PatternSourcePerThread& p)
{
	HitSink::reportMaxed(hs, threadId, p);
	if(sampleMax_) {
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2);
						reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid());
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			Hit& h = hs[r];
			h.oms = (uint32_t)hs.size();
			reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid());
		}
	}
}
Esempio n. 2
0
/**
 * Report maxed-out read; if sampleMax_ is set, then report 1 alignment
 * at random.
 */
void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) {
	if(sampleMax_) {
		HitSink::reportMaxed(hs, p);
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						reportSamHits(hs, i, i+2, 0, hs.size()/2+1);
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1);
		}
	} else {
		reportUnOrMax(p, &hs, false);
	}
}
Esempio n. 3
0
void EventRecorder::registerRandomSource(RandomSource &rnd, const String &name) {
	if (_recordMode == kRecorderRecord) {
		RandomSourceRecord rec;
		rec.name = name;
		rec.seed = rnd.getSeed();
		_randomSourceRecords.push_back(rec);
	}

	if (_recordMode == kRecorderPlayback) {
		for (uint i = 0; i < _randomSourceRecords.size(); ++i) {
			if (_randomSourceRecords[i].name == name) {
				rnd.setSeed(_randomSourceRecords[i].seed);
				_randomSourceRecords.remove_at(i);
				break;
			}
		}
	}
}
/**
 * Start the driver.  The driver will begin by conducting a best-first,
 * index-assisted search through the space of possible full and partial
 * alignments.  This search may be followed up with a dynamic programming
 * extension step, taking a prioritized set of partial SA ranges found
 * during the search and extending each with DP.  The process might also be
 * iterated, with the search being occasioanally halted so that DPs can be
 * tried, then restarted, etc.
 */
int AlignerDriver::go(
	const Scoring& sc,
	const Ebwt& ebwtFw,
	const Ebwt& ebwtBw,
	const BitPairReference& ref,
	DescentMetrics& met,
	WalkMetrics& wlm,
	PerReadMetrics& prm,
	RandomSource& rnd,
	AlnSinkWrap& sink)
{
	if(paired_) {
		// Paired-end - alternate between advancing dr1_ / dr2_ whenever a
		// new full alignment is discovered in the one currently being
		// advanced.  Whenever a new full alignment is found, check to see
		// if it pairs with a previously discovered alignment.
		bool first1 = rnd.nextBool();
		bool first = true;
		DescentStoppingConditions stopc1 = stop_;
		DescentStoppingConditions stopc2 = stop_;
		size_t totszIncr = (stop_.totsz + 7) / 8;
		stopc1.totsz = totszIncr;
		stopc2.totsz = totszIncr;
		while(stopc1.totsz <= stop_.totsz && stopc2.totsz <= stop_.totsz) {
			if(first && first1 && stopc1.totsz <= stop_.totsz) {
				dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
				stopc1.totsz += totszIncr;
			}
			if(stopc2.totsz <= stop_.totsz) {
				dr2_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
				stopc2.totsz += totszIncr;
			}
			first = false;
		}
	} else {
		// Unpaired
		size_t iter = 1;
		while(true) {
			int ret = dr1_.advance(stop_, sc, ebwtFw, ebwtBw, met, prm);
			if(ret == DESCENT_DRIVER_ALN) {
				//cerr << iter << ". DESCENT_DRIVER_ALN" << endl;
			} else if(ret == DESCENT_DRIVER_MEM) {
				//cerr << iter << ". DESCENT_DRIVER_MEM" << endl;
				break;
			} else if(ret == DESCENT_DRIVER_STRATA) {
				// DESCENT_DRIVER_STRATA is returned by DescentDriver.advance()
				// when it has finished with a "non-empty" stratum: a stratum
				// in which at least one alignment was found.  Here we report
				// the alignments in an arbitrary order.
				AlnRes res;
				// Initialize alignment selector with the DescentDriver's
				// alignment sink
				alsel_.init(
					dr1_.query(),
					dr1_.sink(),
					ebwtFw,
					ref,
					rnd,
					wlm);
				while(!alsel_.done() && !sink.state().doneWithMate(true)) {
					res.reset();
					bool ret2 = alsel_.next(
						dr1_,
						ebwtFw,
						ref,
						rnd,
						res,
						wlm,
						prm);
					if(ret2) {
						// Got an alignment
						assert(res.matchesRef(
							dr1_.query(),
							ref,
							tmp_rf_,
							tmp_rdseq_,
							tmp_qseq_,
							raw_refbuf_,
							raw_destU32_,
							raw_matches_));
						// Get reference interval involved in alignment
						Interval refival(res.refid(), 0, res.fw(), res.reflen());
						assert_gt(res.refExtent(), 0);
						// Does alignment falls off end of reference?
						if(gReportOverhangs &&
						   !refival.containsIgnoreOrient(res.refival()))
						{
							res.clipOutside(true, 0, res.reflen());
							if(res.refExtent() == 0) {
								continue;
							}
						}
						assert(gReportOverhangs ||
							   refival.containsIgnoreOrient(res.refival()));
						// Alignment fell entirely outside the reference?
						if(!refival.overlapsIgnoreOrient(res.refival())) {
							continue; // yes, fell outside
						}
						// Alignment redundant with one we've seen previously?
						if(red1_.overlap(res)) {
							continue; // yes, redundant
						}
						red1_.add(res); // so we find subsequent redundancies
						// Report an unpaired alignment
						assert(!sink.state().doneWithMate(true));
						assert(!sink.maxed());
						if(sink.report(0, &res, NULL)) {
							// Short-circuited because a limit, e.g. -k, -m or
							// -M, was exceeded
							return ALDRIVER_POLICY_FULFILLED;
						}
					}
				}
				dr1_.sink().advanceStratum();
			} else if(ret == DESCENT_DRIVER_BWOPS) {
				//cerr << iter << ". DESCENT_DRIVER_BWOPS" << endl;
			} else if(ret == DESCENT_DRIVER_DONE) {
				//cerr << iter << ". DESCENT_DRIVER_DONE" << endl;
				break;
			} else {
				assert(false);
			}
			iter++;
		}
	}
	return ALDRIVER_EXHAUSTED_CANDIDATES;
}
Esempio n. 5
0
	/**
	 * Called by startThread when a new search thread is initialized.  Actually do alignment.
	 */
	void work() {
		Read r;
		HitSet hitset;
		AlignResult res;
		RandomSource rnd;
		int64_t ltotHits = 0, lalReads = 0, lunalReads = 0, lmaxReads = 0;
		int64_t ltotSeedHits = 0, lmaxSeedHits = 0;
		int64_t lunsampled = 0;
		int64_t skipped = 0;
		while(rs_->next(r)) {
			assert(r.repOk());
			if(skipped < readskip) {
				skipped++;
				continue;
			}
			rnd.init(randseed ^ r.rand.nextU32());
			// If sampleRate < 1.0f, apply sampling odds to this read;
			// if it isn't chosen, skip it.
			if(sampleRate < 1.0f && rnd.nextDouble() >= sampleRate) {
				// Not chosen
				lunsampled++;
				continue;
			}
			r.color = /*r.hitset.color =*/ color;
			// If the number of unskipped reads exceeds the readmax
			// ceiling, break out of the loop.  Note that there's a
			// minor race condition here.
			if(nreads.value() + 1 > readmax) {
				if(verbose) {
					cout << "Stopping because readmax " << readmax
					     << " was exceeded" << endl;
				}
				break;
			}
			// Trim as requested by the user.  Could do something more
			// sophisticated here.
			r.trim3(trim3);
			r.trim5(trim5);
			//r.initHitset();
			hitset.reset();
			nreads++;
			// The read must be at least as long as the mer length that
			// was used when building the index.
			if(r.seq.length() < (size_t)ap.iSeedLen) {
				if(!quiet) {
					cerr << "Warning: Skipping read " << r.name
					     << " because length " << r.seq.length()
					     << " was less than indexable seed length "
					     << ap.iSeedLen << endl;
				}
				os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_INDEX);
				continue;
			}
			// The read must be at least as long as the mer length that
			// was used when building the index.
			size_t clen = r.seq.length();
			if(ap.minLen != -1 && clen < (size_t)ap.adjMinLen) {
				if(!quiet) {
					cerr << "Warning: Skipping read " << r.name
					     << " because length " << clen
					     << " was such that alignment length would be less "
					     << "than --minlen: " << ap.minLen << endl;
				}
				os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_MINLEN_PARAMS);
				continue;
			}
			// The read is trimmed and has passed all filters.  Next we
			// align it.
			if(verbose) cout << "  aligning read: " << r << endl;
			if(!ind_->empty()) {
				res.clear(); // clear the alignment results structure
				//assert(iformat == INPUT_CHAININ || r.hitset.maxedStratum == -1);
				ind_->query(r, *refs_, rmap_, amap_, hitset, *os_, res, ap, true, rnd, tid_);
				// Update per-thread counters
				if(res.hits > 0) {
					lalReads++;
					ltotHits += res.hits;
				} else if(res.maxed) lmaxReads++;
				else lunalReads++;
				ltotSeedHits += res.seedHits;
				lmaxSeedHits = max<int64_t>(lmaxSeedHits, res.seedHits);
				if(res.bail) {
					// The aligner signaled that we should bail at this
					// point.
					throw 1;
				}
			} else {
				// If the index is empty, there can't possibly be any
				// hits.  TODO: this seems like something that should
				// cause an error early on.
				hitset.reportUpTo(r, *os_, ap.khits, *refs_, rmap_, false, amap_);
			}
			if((nreads.value()+1) % updateEvery == 0) {
				// Fold per-thread counters into global counters
				unalReads    += lunalReads;
				alReads      += lalReads;
				maxReads     += lmaxReads;
				maxSeedHits.max(lmaxSeedHits);
				totSeedHits  += ltotSeedHits;
				totAls       += ltotHits;
				totUnsampled += lunsampled;
				lunalReads   = 0;
				lalReads     = 0;
				lmaxReads    = 0;
				ltotSeedHits = 0;
				ltotHits     = 0;
				lunsampled   = 0;
			}
		}
		// Update global counters in synchronized fashion
		unalReads    += lunalReads;
		alReads      += lalReads;
		maxReads     += lmaxReads;
		maxSeedHits.max(lmaxSeedHits);
		totSeedHits  += ltotSeedHits;
		totAls       += ltotHits;
		totUnsampled += lunsampled;
	}