Пример #1
0
void BiPolerCaller::simulateData(bool setDeleteTag)
{
    deletionQV = rndPct(gen) <= lowDelPct ? lowDelPhred : highDelPhred;
    insrtionQV = rndPct(gen) <= lowInsPct ? lowInsPhred : highInsPhred;
    substitutionQV = rndPct(gen) <= lowSubPct ? lowSubPhred : highSubPhred;
    const long double pDeletion = phred2prob(deletionQV);
    const long double pInsertion = phred2prob(insrtionQV);
    const long double pSubstitution = phred2prob(substitutionQV);
    const long double pError = pDeletion + pInsertion + pSubstitution;
    qualityValue = prob2phred(pError);

    if (rndProb(gen) < pError)
    {
        const long double pErrorType = rndProb(gen);
        if (pErrorType < pDeletion/pError)
            deletionError();
        else if (pErrorType < (pDeletion + pInsertion)/pError)
            insertionError(setDeleteTag);
        else
            substitutinoError(setDeleteTag);
    }
    else
    {
        baseCall = t[locus];
        if (setDeleteTag) deletionTag = randomBase(baseCall);
        substitutionTag = randomBase(baseCall);
    }
}
Пример #2
0
// This is Equation 2 (tranformed to log) from
// A statistical framework for SNP calling ... , Heng Li, Bioinformatics, 2011
// http://bioinformatics.oxfordjournals.org/content/27/21/2987.full
double Caller::genotype_log_likelihood(const BasePileup& bp,
                                       const vector<pair<int, int> >& base_offsets,
                                       double g, char first, char second) {
    double m = 2.; // always assume two alleles

    double log_likelihood = log(0.25); // 1 / m^2, where m = ploidy = 2;

    const string& bases = bp.bases();
    const string& quals = bp.qualities();
    double perr;

    for (int i = 0; i < base_offsets.size(); ++i) {
        char base = Pileups::extract_match(bp, base_offsets[i].first);
        char qual = base_offsets[i].second >= 0 ? quals[base_offsets[i].second] : _default_quality;
        perr = phred2prob(qual);
        if (base == first) {
            log_likelihood += safe_log((m - g) * perr + g * (1. - perr));
        } else if (base == second) {
            log_likelihood += safe_log((m - g) * (1. - perr) + g * perr);
        } else {
            log_likelihood += safe_log(perr * perr);
        }
    }

    return log_likelihood;
}
Пример #3
0
BiPolerCaller::BiPolerCaller(const string &t
                             , char lowDelPhred
                             , char highDelPhred
                             , char lowInsPhred
                             , char highInsPhred
                             , char lowSubPhred
                             , char highSubPhred
                             , short lowDelPct
                             , short lowInsPct
                             , short lowSubPct
                             , unsigned int locus)
    : BaseCaller(t, locus)
    ,lowDelPhred(lowDelPhred)
    ,highDelPhred(highDelPhred)
    ,lowInsPhred(lowInsPhred)
    ,highInsPhred(highInsPhred)
    ,lowSubPhred(lowSubPhred)
    ,highSubPhred(highSubPhred)
    ,lowDelPct(lowDelPct)
    ,lowInsPct(lowInsPct)
    ,lowSubPct(lowSubPct)
    ,probLowDel(phred2prob(lowDelPhred))
    ,probHighDel(phred2prob(highDelPhred))
    ,probLowIns(phred2prob(lowInsPhred))
    ,probHighIns(phred2prob(highInsPhred))
    ,probLowSub(phred2prob(lowSubPhred))
    ,probHighSub(phred2prob(highSubPhred))
    ,rndPct(1,100)
{
        simulateData();
}