/** * Report a maxed-out read. */ void VerboseHitSink::reportMaxed( vector<Hit>& hs, size_t threadId, PatternSourcePerThread& p) { HitSink::reportMaxed(hs, threadId, p); if(sampleMax_) { RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { hs[i].oms = hs[i+1].oms = (uint32_t)(hs.size()/2); reportHits(NULL, &hs, i, i+2, threadId, 0, 0, true, p.rdid()); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; Hit& h = hs[r]; h.oms = (uint32_t)hs.size(); reportHits(&h, NULL, 0, 1, threadId, 0, 0, true, p.rdid()); } } }
/** * Report maxed-out read; if sampleMax_ is set, then report 1 alignment * at random. */ void SAMHitSink::reportMaxed(vector<Hit>& hs, PatternSourcePerThread& p) { if(sampleMax_) { HitSink::reportMaxed(hs, p); RandomSource rand; rand.init(p.bufa().seed); assert_gt(hs.size(), 0); bool paired = hs.front().mate > 0; size_t num = 1; if(paired) { num = 0; int bestStratum = 999; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat < bestStratum) { bestStratum = strat; num = 1; } else if(strat == bestStratum) { num++; } } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; num = 0; for(size_t i = 0; i < hs.size()-1; i += 2) { int strat = min(hs[i].stratum, hs[i+1].stratum); if(strat == bestStratum) { if(num == r) { reportSamHits(hs, i, i+2, 0, hs.size()/2+1); break; } num++; } } assert_eq(num, r); } else { for(size_t i = 1; i < hs.size(); i++) { assert_geq(hs[i].stratum, hs[i-1].stratum); if(hs[i].stratum == hs[i-1].stratum) num++; else break; } assert_leq(num, hs.size()); uint32_t r = rand.nextU32() % num; reportSamHit(hs[r], /*MAPQ*/0, /*XM:I*/hs.size()+1); } } else { reportUnOrMax(p, &hs, false); } }
/** * Report either an unaligned read or a read that exceeded the -m * ceiling. We output placeholders for most of the fields in this * case. */ void SAMHitSink::reportUnOrMax(PatternSourcePerThread& p, vector<Hit>* hs, bool un) // lower bound on number of other hits { if(un) HitSink::reportUnaligned(p); else HitSink::reportMaxed(*hs, p); ostringstream ss; bool paired = !p.bufb().empty(); assert(paired || p.bufa().mate == 0); assert(!paired || p.bufa().mate > 0); assert(un || hs->size() > 0); assert(!un || hs == NULL || hs->size() == 0); size_t hssz = 0; if(hs != NULL) hssz = hs->size(); if(paired) { // truncate final 2 chars for(int i = 0; i < (int)seqan::length(p.bufa().name)-2; i++) { if(!noQnameTrunc_ && isspace((int)p.bufa().name[i])) break; ss << p.bufa().name[i]; } } else { for(int i = 0; i < (int)seqan::length(p.bufa().name); i++) { if(!noQnameTrunc_ && isspace((int)p.bufa().name[i])) break; ss << p.bufa().name[i]; } } ss << "\t" << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_FIRST_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*" << "\t0\t0\t*\t*\t0\t0\t" << p.bufa().patFw << "\t" << p.bufa().qual << "\tXM:i:" << (paired ? (hssz+1)/2 : hssz); // Add optional fields reporting the primer base and the downstream color, // which, if they were present, were clipped when the read was read in if(p.bufa().color && gReportColorPrimer) { if(p.bufa().primer != '?') { ss << "\tZP:Z:" << p.bufa().primer; assert(isprint(p.bufa().primer)); } if(p.bufa().trimc != '?') { ss << "\tZp:Z:" << p.bufa().trimc; assert(isprint(p.bufa().trimc)); } } ss << endl; if(paired) { // truncate final 2 chars for(int i = 0; i < (int)seqan::length(p.bufb().name)-2; i++) { ss << p.bufb().name[i]; } ss << "\t" << (SAM_FLAG_UNMAPPED | (paired ? (SAM_FLAG_PAIRED | SAM_FLAG_SECOND_IN_PAIR | SAM_FLAG_MATE_UNMAPPED) : 0)) << "\t*" << "\t0\t0\t*\t*\t0\t0\t" << p.bufb().patFw << "\t" << p.bufb().qual << "\tXM:i:" << (hssz+1)/2; // Add optional fields reporting the primer base and the downstream color, // which, if they were present, were clipped when the read was read in if(p.bufb().color && gReportColorPrimer) { if(p.bufb().primer != '?') { ss << "\tZP:Z:" << p.bufb().primer; assert(isprint(p.bufb().primer)); } if(p.bufb().trimc != '?') { ss << "\tZp:Z:" << p.bufb().trimc; assert(isprint(p.bufb().trimc)); } } ss << endl; } lock(0); out(0).writeString(ss.str()); unlock(0); }