Пример #1
0
/**
 * Report a maxed-out read.
 */
void VerboseHitSink::reportMaxed(
	vector<Hit>& hs,
	size_t threadId,
	PatternSourcePerThread& p)
{
	HitSink::reportMaxed(hs, threadId, p);
	if(sampleMax_) {
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2);
						reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid());
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			Hit& h = hs[r];
			h.oms = (uint32_t)hs.size();
			reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid());
		}
	}
}
Пример #2
0
/**
 * Report maxed-out read; if sampleMax_ is set, then report 1 alignment
 * at random.
 */
void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) {
	if(sampleMax_) {
		HitSink::reportMaxed(hs, p);
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						reportSamHits(hs, i, i+2, 0, hs.size()/2+1);
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1);
		}
	} else {
		reportUnOrMax(p, &hs, false);
	}
}
Пример #3
0
	/**
	 * Called by startThread when a new search thread is initialized.  Actually do alignment.
	 */
	void work() {
		Read r;
		HitSet hitset;
		AlignResult res;
		RandomSource rnd;
		int64_t ltotHits = 0, lalReads = 0, lunalReads = 0, lmaxReads = 0;
		int64_t ltotSeedHits = 0, lmaxSeedHits = 0;
		int64_t lunsampled = 0;
		int64_t skipped = 0;
		while(rs_->next(r)) {
			assert(r.repOk());
			if(skipped < readskip) {
				skipped++;
				continue;
			}
			rnd.init(randseed ^ r.rand.nextU32());
			// If sampleRate < 1.0f, apply sampling odds to this read;
			// if it isn't chosen, skip it.
			if(sampleRate < 1.0f && rnd.nextDouble() >= sampleRate) {
				// Not chosen
				lunsampled++;
				continue;
			}
			r.color = /*r.hitset.color =*/ color;
			// If the number of unskipped reads exceeds the readmax
			// ceiling, break out of the loop.  Note that there's a
			// minor race condition here.
			if(nreads.value() + 1 > readmax) {
				if(verbose) {
					cout << "Stopping because readmax " << readmax
					     << " was exceeded" << endl;
				}
				break;
			}
			// Trim as requested by the user.  Could do something more
			// sophisticated here.
			r.trim3(trim3);
			r.trim5(trim5);
			//r.initHitset();
			hitset.reset();
			nreads++;
			// The read must be at least as long as the mer length that
			// was used when building the index.
			if(r.seq.length() < (size_t)ap.iSeedLen) {
				if(!quiet) {
					cerr << "Warning: Skipping read " << r.name
					     << " because length " << r.seq.length()
					     << " was less than indexable seed length "
					     << ap.iSeedLen << endl;
				}
				os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_INDEX);
				continue;
			}
			// The read must be at least as long as the mer length that
			// was used when building the index.
			size_t clen = r.seq.length();
			if(ap.minLen != -1 && clen < (size_t)ap.adjMinLen) {
				if(!quiet) {
					cerr << "Warning: Skipping read " << r.name
					     << " because length " << clen
					     << " was such that alignment length would be less "
					     << "than --minlen: " << ap.minLen << endl;
				}
				os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_MINLEN_PARAMS);
				continue;
			}
			// The read is trimmed and has passed all filters.  Next we
			// align it.
			if(verbose) cout << "  aligning read: " << r << endl;
			if(!ind_->empty()) {
				res.clear(); // clear the alignment results structure
				//assert(iformat == INPUT_CHAININ || r.hitset.maxedStratum == -1);
				ind_->query(r, *refs_, rmap_, amap_, hitset, *os_, res, ap, true, rnd, tid_);
				// Update per-thread counters
				if(res.hits > 0) {
					lalReads++;
					ltotHits += res.hits;
				} else if(res.maxed) lmaxReads++;
				else lunalReads++;
				ltotSeedHits += res.seedHits;
				lmaxSeedHits = max<int64_t>(lmaxSeedHits, res.seedHits);
				if(res.bail) {
					// The aligner signaled that we should bail at this
					// point.
					throw 1;
				}
			} else {
				// If the index is empty, there can't possibly be any
				// hits.  TODO: this seems like something that should
				// cause an error early on.
				hitset.reportUpTo(r, *os_, ap.khits, *refs_, rmap_, false, amap_);
			}
			if((nreads.value()+1) % updateEvery == 0) {
				// Fold per-thread counters into global counters
				unalReads    += lunalReads;
				alReads      += lalReads;
				maxReads     += lmaxReads;
				maxSeedHits.max(lmaxSeedHits);
				totSeedHits  += ltotSeedHits;
				totAls       += ltotHits;
				totUnsampled += lunsampled;
				lunalReads   = 0;
				lalReads     = 0;
				lmaxReads    = 0;
				ltotSeedHits = 0;
				ltotHits     = 0;
				lunsampled   = 0;
			}
		}
		// Update global counters in synchronized fashion
		unalReads    += lunalReads;
		alReads      += lalReads;
		maxReads     += lmaxReads;
		maxSeedHits.max(lmaxSeedHits);
		totSeedHits  += ltotSeedHits;
		totAls       += ltotHits;
		totUnsampled += lunsampled;
	}