/** * Report a maxed-out read. */ void VerboseHitSink::reportMaxed( vector<Hit>& hs, size_t threadId, PatternSourcePerThread& p) { HitSink::reportMaxed(hs, threadId, p); if(sampleMax_) { RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2); reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid()); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; Hit& h = hs[r]; h.oms = (uint32_t)hs.size(); reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid()); } } }
/** * Report maxed-out read; if sampleMax_ is set, then report 1 alignment * at random. */ void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) { if(sampleMax_) { HitSink::reportMaxed(hs, p); RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { reportSamHits(hs, i, i+2, 0, hs.size()/2+1); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1); } } else { reportUnOrMax(p, &hs, false); } }
/** * Called by startThread when a new search thread is initialized. Actually do alignment. */ void work() { Read r; HitSet hitset; AlignResult res; RandomSource rnd; int64_t ltotHits = 0, lalReads = 0, lunalReads = 0, lmaxReads = 0; int64_t ltotSeedHits = 0, lmaxSeedHits = 0; int64_t lunsampled = 0; int64_t skipped = 0; while(rs_->next(r)) { assert(r.repOk()); if(skipped < readskip) { skipped++; continue; } rnd.init(randseed ^ r.rand.nextU32()); // If sampleRate < 1.0f, apply sampling odds to this read; // if it isn't chosen, skip it. if(sampleRate < 1.0f && rnd.nextDouble() >= sampleRate) { // Not chosen lunsampled++; continue; } r.color = /*r.hitset.color =*/ color; // If the number of unskipped reads exceeds the readmax // ceiling, break out of the loop. Note that there's a // minor race condition here. if(nreads.value() + 1 > readmax) { if(verbose) { cout << "Stopping because readmax " << readmax << " was exceeded" << endl; } break; } // Trim as requested by the user. Could do something more // sophisticated here. r.trim3(trim3); r.trim5(trim5); //r.initHitset(); hitset.reset(); nreads++; // The read must be at least as long as the mer length that // was used when building the index. if(r.seq.length() < (size_t)ap.iSeedLen) { if(!quiet) { cerr << "Warning: Skipping read " << r.name << " because length " << r.seq.length() << " was less than indexable seed length " << ap.iSeedLen << endl; } os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_INDEX); continue; } // The read must be at least as long as the mer length that // was used when building the index. size_t clen = r.seq.length(); if(ap.minLen != -1 && clen < (size_t)ap.adjMinLen) { if(!quiet) { cerr << "Warning: Skipping read " << r.name << " because length " << clen << " was such that alignment length would be less " << "than --minlen: " << ap.minLen << endl; } os_->printUnalignedRead(r, r.seq, r.qual, FILTER_TOO_SHORT_FOR_MINLEN_PARAMS); continue; } // The read is trimmed and has passed all filters. Next we // align it. if(verbose) cout << " aligning read: " << r << endl; if(!ind_->empty()) { res.clear(); // clear the alignment results structure //assert(iformat == INPUT_CHAININ || r.hitset.maxedStratum == -1); ind_->query(r, *refs_, rmap_, amap_, hitset, *os_, res, ap, true, rnd, tid_); // Update per-thread counters if(res.hits > 0) { lalReads++; ltotHits += res.hits; } else if(res.maxed) lmaxReads++; else lunalReads++; ltotSeedHits += res.seedHits; lmaxSeedHits = max<int64_t>(lmaxSeedHits, res.seedHits); if(res.bail) { // The aligner signaled that we should bail at this // point. throw 1; } } else { // If the index is empty, there can't possibly be any // hits. TODO: this seems like something that should // cause an error early on. hitset.reportUpTo(r, *os_, ap.khits, *refs_, rmap_, false, amap_); } if((nreads.value()+1) % updateEvery == 0) { // Fold per-thread counters into global counters unalReads += lunalReads; alReads += lalReads; maxReads += lmaxReads; maxSeedHits.max(lmaxSeedHits); totSeedHits += ltotSeedHits; totAls += ltotHits; totUnsampled += lunsampled; lunalReads = 0; lalReads = 0; lmaxReads = 0; ltotSeedHits = 0; ltotHits = 0; lunsampled = 0; } } // Update global counters in synchronized fashion unalReads += lunalReads; alReads += lalReads; maxReads += lmaxReads; maxSeedHits.max(lmaxSeedHits); totSeedHits += ltotSeedHits; totAls += ltotHits; totUnsampled += lunsampled; }