Beispiel #1
0
/**
 * Report a maxed-out read.
 */
void VerboseHitSink::reportMaxed(
	vector<Hit>& hs,
	size_t threadId,
	PatternSourcePerThread& p)
{
	HitSink::reportMaxed(hs, threadId, p);
	if(sampleMax_) {
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2);
						reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid());
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			Hit& h = hs[r];
			h.oms = (uint32_t)hs.size();
			reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid());
		}
	}
}
/**
 * Report maxed-out read; if sampleMax_ is set, then report 1 alignment
 * at random.
 */
void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) {
	if(sampleMax_) {
		HitSink::reportMaxed(hs, p);
		RandomSource rand;
		rand.init(p.bufa().seed);
		assert_gt(hs.size(), 0);
		bool paired = hs.front().mate > 0;
		size_t num = 1;
		if(paired) {
			num = 0;
			int bestStratum = 999;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat < bestStratum) {
					bestStratum = strat;
					num = 1;
				} else if(strat == bestStratum) {
					num++;
				}
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			num = 0;
			for(size_t i = 0; i < hs.size()-1; i += 2) {
				int strat = min(hs[i].stratum, hs[i+1].stratum);
				if(strat == bestStratum) {
					if(num == r) {
						reportSamHits(hs, i, i+2, 0, hs.size()/2+1);
						break;
					}
					num++;
				}
			}
			assert_eq(num, r);
		} else {
			for(size_t i = 1; i < hs.size(); i++) {
				assert_geq(hs[i].stratum, hs[i-1].stratum);
				if(hs[i].stratum == hs[i-1].stratum) num++;
				else break;
			}
			assert_leq(num, hs.size());
			uint32_t r = rand.nextU32() % num;
			reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1);
		}
	} else {
		reportUnOrMax(p, &hs, false);
	}
}
/**
 * Report either an unaligned read or a read that exceeded the -m
 * ceiling.  We output placeholders for most of the fields in this
 * case.
 */
void SAMHitSink::reportUnOrMax(PatternSourcePerThread& p,
                               vector<Hit>* hs,
                               bool un) // lower bound on number of other hits
{
	if(un) HitSink::reportUnaligned(p);
	else   HitSink::reportMaxed(*hs, p);
	ostringstream ss;
	bool paired = !p.bufb().empty();
	assert(paired || p.bufa().mate == 0);
	assert(!paired || p.bufa().mate > 0);
	assert(un || hs->size() > 0);
	assert(!un || hs == NULL || hs->size() == 0);
	size_t hssz = 0;
	if(hs != NULL) hssz = hs->size();
	if(paired) {
		// truncate final 2 chars
		for(int i = 0; i < (int)seqan::length(p.bufa().name)-2; i++) {
			if(!noQnameTrunc_ && isspace((int)p.bufa().name[i])) break;
			ss << p.bufa().name[i];
		}
	} else {
		for(int i = 0; i < (int)seqan::length(p.bufa().name); i++) {
			if(!noQnameTrunc_ && isspace((int)p.bufa().name[i])) break;
			ss << p.bufa().name[i];
		}
	}
	ss << "\t"
	   << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_FIRST_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*"
	   << "\t0\t0\t*\t*\t0\t0\t"
	   << p.bufa().patFw << "\t" << p.bufa().qual << "\tXM:i:"
	   << (paired ? (hssz+1)/2 : hssz);
	// Add optional fields reporting the primer base and the downstream color,
	// which, if they were present, were clipped when the read was read in
	if(p.bufa().color && gReportColorPrimer) {
		if(p.bufa().primer != '?') {
			ss << "\tZP:Z:" << p.bufa().primer;
			assert(isprint(p.bufa().primer));
		}
		if(p.bufa().trimc != '?') {
			ss << "\tZp:Z:" << p.bufa().trimc;
			assert(isprint(p.bufa().trimc));
		}
	}
	ss << endl;
	if(paired) {
		// truncate final 2 chars
		for(int i = 0; i < (int)seqan::length(p.bufb().name)-2; i++) {
			ss << p.bufb().name[i];
		}
		ss << "\t"
		   << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_SECOND_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*"
		   << "\t0\t0\t*\t*\t0\t0\t"
		   << p.bufb().patFw << "\t" << p.bufb().qual << "\tXM:i:"
		   << (hssz+1)/2;
		// Add optional fields reporting the primer base and the downstream color,
		// which, if they were present, were clipped when the read was read in
		if(p.bufb().color && gReportColorPrimer) {
			if(p.bufb().primer != '?') {
				ss << "\tZP:Z:" << p.bufb().primer;
				assert(isprint(p.bufb().primer));
			}
			if(p.bufb().trimc != '?') {
				ss << "\tZp:Z:" << p.bufb().trimc;
				assert(isprint(p.bufb().trimc));
			}
		}
		ss << endl;
	}
	lock(0);
	out(0).writeString(ss.str());
	unlock(0);
}