Ejemplo n.º 1
0
static
void
calculate_result_set(const strelka_options& opt,
                     const double* normal_lnprior,
                     const double lnmatch,
                     const double lnmismatch,
                     const double* normal_lhood,
                     const double* tumor_lhood,
                     result_set& rs) {

#ifdef SOMATIC_DEBUG
    std::vector<double> check_prior(DDIINDEL::SIZE);

    for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) {
        const double base_prior(normal_lnprior[ngt]);
        for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) {
            const unsigned dgt(DDIINDEL::get_state(ngt,tgt));
            check_prior[dgt] =
                base_prior+
                ((tgt==ngt) ? lnmatch : lnmismatch);
        }
    }

    check_ln_distro(check_prior.begin(),
                    check_prior.end(),
                    "somatic indel full prior");
#endif

    // get unnormalized posterior:
    std::vector<double> pprob(DDIINDEL::SIZE);

    for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) {
        const double base_prior(normal_lnprior[ngt]);
        for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) {
            const unsigned dgt(DDIINDEL::get_state(ngt,tgt));
            pprob[dgt] =
                normal_lhood[ngt]+
                tumor_lhood[tgt]+
                base_prior+
                ((tgt==ngt) ? lnmatch : lnmismatch);
        }
    }

    normalize_ln_distro(pprob.begin(),pprob.end(),rs.max_gt);

#ifdef DEBUG_INDEL_CALL
    log_os << "INDEL_CALL pprob(noindel),pprob(hom),pprob(het): " << pprob[STAR_DIINDEL::NOINDEL] << " " << pprob[STAR_DIINDEL::HOM] << " " << pprob[STAR_DIINDEL::HET] << "\n";
#endif
    double nonsomatic_sum(0);
    for (unsigned gt(0); gt<STAR_DIINDEL::SIZE; ++gt) {
        nonsomatic_sum += pprob[DDIINDEL::get_state(gt,gt)];
    }
    rs.sindel_qphred=error_prob_to_qphred(nonsomatic_sum);

    double not_somfrom_sum[STAR_DIINDEL::SIZE];
    for (unsigned sgt(0); sgt<STAR_DIINDEL::SIZE; ++sgt) {
        not_somfrom_sum[sgt]=nonsomatic_sum;
        for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) {
            if (sgt==ngt) continue;
            for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) {
                if (tgt==ngt) continue;
                not_somfrom_sum[sgt] += pprob[DDIINDEL::get_state(ngt,tgt)];
            }
        }
    }
    rs.sindel_from_ref_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::NOINDEL]);
    rs.sindel_from_het_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::HET]);
    rs.sindel_from_hom_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::HOM]);

    double not_somfromanyhom_sum(nonsomatic_sum);
    for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) {
        if (STAR_DIINDEL::HET != ngt) continue;
        for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) {
            if (tgt==ngt) continue;
            not_somfromanyhom_sum += pprob[DDIINDEL::get_state(ngt,tgt)];
        }
    }
    rs.sindel_from_anyhom_qphred=error_prob_to_qphred(not_somfromanyhom_sum);

    rs.max_gt_qphred=error_prob_to_qphred(prob_comp(pprob.begin(),pprob.end(),rs.max_gt));
}
// Given the likelihood, go through the final computations to get the
// posterior and derived values.
//
static
void
calculate_result_set_grid(const blt_float_t* normal_lhood,
                          const blt_float_t* tumor_lhood,
                          const somatic_snv_caller_strand_grid::prior_set& pset,
                          const blt_float_t lnmatch,
                          const blt_float_t lnmismatch,
                          const unsigned /*ref_gt*/,
                          result_set& rs) {

    // a piece transplanted from 1150 to make a formal correction to
    // the priors which should have a low-impact on the results.
    // the prior below is incomplete
#ifdef DEBUG_ALTERNATE_PRIOR
    static const double neginf(-std::numeric_limits<double>::infinity());

    std::vector<double> prior(DDIGT_SGRID::SIZE);
    std::fill(prior.begin(),prior.end(),neginf);

    // this zero'd code is incomplete and abandoned for now...:
#if 0
    for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) {
        double base_prior(neginf);
        const bool is_noise(ngt>=STAR_DIINDEL::SIZE);
        if(is_noise) {
            base_prior=pset.normal[ngt];
        } else {
            base_prior=pset.nonoise[ngt];
        }
        for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) {
            const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch );
            const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt));
            prior[dgt] = normal_genomic_lnprior[ngt]+tgt_prior_mod;
        }
    }

    for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) {
        const unsigned dgt(DDIGT_SGRID::get_state(gt,gt));
        prior[dgt] = normal_genomic_lnprior[gt]+lnmatch;
    }
#endif

    check_ln_distro(prior.begin(),
                    prior.end(),
                    "somatic snv full prior");
#endif

    // intentionally use higher float res for this structure:
    std::vector<double> pprob(DDIGT_SGRID::SIZE);

    // mult by prior distro to get unnormalized pprob for states in
    // the regular grid model:
    //
    for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) {
        for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) {
            const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt));

#if 0
            // the trusty old way...:
            const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch );
            pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+pset.normal[ngt]+tgt_prior_mod;
#else

            // unorm takes the role of the normal prior for the somatic case:
            //            static const blt_float_t unorm(std::log(static_cast<blt_float_t>(DIGT_SGRID::PRESTRAND_SIZE)));
            blt_float_t prior;
            if(tgt==ngt) {
                prior=pset.normal[ngt]+lnmatch;
            } else {
                prior=pset.somatic_marginal[ngt]+lnmismatch;
            }
            pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+prior;

#endif
        }
    }

    // Now add the single-strand noise states. note that these states
    // are unique in that we don't look for mixtures of somatic
    // variation with these noise states, b/c single-strand
    // observations can almost exclusively be ruled out as noise:
    //
    for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) {
        const unsigned dgt(DDIGT_SGRID::get_state(gt,gt));
        pprob[dgt] = normal_lhood[gt]+tumor_lhood[gt]+pset.normal[gt]+lnmatch;
    }

    opt_normalize_ln_distro(pprob.begin(),pprob.end(),DDIGT_SGRID::is_nonsom.val.begin(),rs.max_gt);
    //normalize_ln_distro(pprob.begin(),pprob.end(),rs.max_gt);

    double nonsomatic_sum(0);
    for(unsigned gt(0); gt<DIGT_SGRID::SIZE; ++gt) {
        nonsomatic_sum += pprob[DDIGT_SGRID::get_state(gt,gt)];
    }
    rs.snv_qphred=error_prob_to_qphred(nonsomatic_sum);

    if(0==rs.snv_qphred) return;

#if 0
    // alternate way to calculate the joint:
    //
    double min_not_somfrom_sum(0);
    for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) {
        double not_somfrom_sum(nonsomatic_sum);

        for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) {
            // we're looking for the joint prob when state dgt is true
            // in the normal, so skip this as a normal state here:
            //
            if(dgt==ngt) continue;

            for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) {
                // we've already started from the nonsomatic som, so we can skip the equal states:
                //
                if(ngt==tgt) continue;

                not_somfrom_sum += pprob[DDIGT_SGRID::get_state(ngt,tgt)];
            }
        }

        if((dgt==0) || (!_somfrom_sum<min_not_somfrom_sum)) {
            min_not_somfrom_sum=not_somfrom_sum;
            rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum);
            rs.ntype=dgt;
        }
    }
#endif

#if 0
    // reset max_gt to the most likely state excluding normal noise states:
    //
    rs.max_gt=0;
    for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) {
        for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) {
            const unsigned xgt(DDIGT_SGRID::get_state(dgt,tgt));
            if(pprob[xgt] > pprob[rs.max_gt]) rs.max_gt=xgt;
        }
    }
#endif

    // Calculate normal distribution alone so that we can classify this call:
    //
    // Polymorphic prior is used because in this situation we want to
    // be conservative about the reference classification --
    // ie. conditioned on only looking at putative somatic sites, we
    // require evidence to show that the normal is in fact reference
    // and not simply an unsampled copy of the somatic variation.
    //
    std::vector<double> normal_pprob(DIGT_SGRID::PRESTRAND_SIZE);
    for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) {
        normal_pprob[ngt] = normal_lhood[ngt]+pset.normal_poly[ngt];
    }

    unsigned max_norm_gt(0);
    normalize_ln_distro(normal_pprob.begin(),normal_pprob.end(),max_norm_gt);

    // find the probability of max_norm_gt:
    const double ngt_prob(prob_comp(normal_pprob.begin(),normal_pprob.end(),max_norm_gt));

    // (1-(1-a)(1-b)) -> a+b-(ab)
    double not_somfrom_sum(nonsomatic_sum+ngt_prob-(nonsomatic_sum*ngt_prob));

    rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum);
    rs.ntype=max_norm_gt;
}