void BiPolerCaller::simulateData(bool setDeleteTag) { deletionQV = rndPct(gen) <= lowDelPct ? lowDelPhred : highDelPhred; insrtionQV = rndPct(gen) <= lowInsPct ? lowInsPhred : highInsPhred; substitutionQV = rndPct(gen) <= lowSubPct ? lowSubPhred : highSubPhred; const long double pDeletion = phred2prob(deletionQV); const long double pInsertion = phred2prob(insrtionQV); const long double pSubstitution = phred2prob(substitutionQV); const long double pError = pDeletion + pInsertion + pSubstitution; qualityValue = prob2phred(pError); if (rndProb(gen) < pError) { const long double pErrorType = rndProb(gen); if (pErrorType < pDeletion/pError) deletionError(); else if (pErrorType < (pDeletion + pInsertion)/pError) insertionError(setDeleteTag); else substitutinoError(setDeleteTag); } else { baseCall = t[locus]; if (setDeleteTag) deletionTag = randomBase(baseCall); substitutionTag = randomBase(baseCall); } }
// This is Equation 2 (tranformed to log) from // A statistical framework for SNP calling ... , Heng Li, Bioinformatics, 2011 // http://bioinformatics.oxfordjournals.org/content/27/21/2987.full double Caller::genotype_log_likelihood(const BasePileup& bp, const vector<pair<int, int> >& base_offsets, double g, char first, char second) { double m = 2.; // always assume two alleles double log_likelihood = log(0.25); // 1 / m^2, where m = ploidy = 2; const string& bases = bp.bases(); const string& quals = bp.qualities(); double perr; for (int i = 0; i < base_offsets.size(); ++i) { char base = Pileups::extract_match(bp, base_offsets[i].first); char qual = base_offsets[i].second >= 0 ? quals[base_offsets[i].second] : _default_quality; perr = phred2prob(qual); if (base == first) { log_likelihood += safe_log((m - g) * perr + g * (1. - perr)); } else if (base == second) { log_likelihood += safe_log((m - g) * (1. - perr) + g * perr); } else { log_likelihood += safe_log(perr * perr); } } return log_likelihood; }
BiPolerCaller::BiPolerCaller(const string &t , char lowDelPhred , char highDelPhred , char lowInsPhred , char highInsPhred , char lowSubPhred , char highSubPhred , short lowDelPct , short lowInsPct , short lowSubPct , unsigned int locus) : BaseCaller(t, locus) ,lowDelPhred(lowDelPhred) ,highDelPhred(highDelPhred) ,lowInsPhred(lowInsPhred) ,highInsPhred(highInsPhred) ,lowSubPhred(lowSubPhred) ,highSubPhred(highSubPhred) ,lowDelPct(lowDelPct) ,lowInsPct(lowInsPct) ,lowSubPct(lowSubPct) ,probLowDel(phred2prob(lowDelPhred)) ,probHighDel(phred2prob(highDelPhred)) ,probLowIns(phred2prob(lowInsPhred)) ,probHighIns(phred2prob(highInsPhred)) ,probLowSub(phred2prob(lowSubPhred)) ,probHighSub(phred2prob(highSubPhred)) ,rndPct(1,100) { simulateData(); }