static void calculate_result_set(const strelka_options& opt, const double* normal_lnprior, const double lnmatch, const double lnmismatch, const double* normal_lhood, const double* tumor_lhood, result_set& rs) { #ifdef SOMATIC_DEBUG std::vector<double> check_prior(DDIINDEL::SIZE); for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) { const double base_prior(normal_lnprior[ngt]); for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) { const unsigned dgt(DDIINDEL::get_state(ngt,tgt)); check_prior[dgt] = base_prior+ ((tgt==ngt) ? lnmatch : lnmismatch); } } check_ln_distro(check_prior.begin(), check_prior.end(), "somatic indel full prior"); #endif // get unnormalized posterior: std::vector<double> pprob(DDIINDEL::SIZE); for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) { const double base_prior(normal_lnprior[ngt]); for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) { const unsigned dgt(DDIINDEL::get_state(ngt,tgt)); pprob[dgt] = normal_lhood[ngt]+ tumor_lhood[tgt]+ base_prior+ ((tgt==ngt) ? lnmatch : lnmismatch); } } normalize_ln_distro(pprob.begin(),pprob.end(),rs.max_gt); #ifdef DEBUG_INDEL_CALL log_os << "INDEL_CALL pprob(noindel),pprob(hom),pprob(het): " << pprob[STAR_DIINDEL::NOINDEL] << " " << pprob[STAR_DIINDEL::HOM] << " " << pprob[STAR_DIINDEL::HET] << "\n"; #endif double nonsomatic_sum(0); for (unsigned gt(0); gt<STAR_DIINDEL::SIZE; ++gt) { nonsomatic_sum += pprob[DDIINDEL::get_state(gt,gt)]; } rs.sindel_qphred=error_prob_to_qphred(nonsomatic_sum); double not_somfrom_sum[STAR_DIINDEL::SIZE]; for (unsigned sgt(0); sgt<STAR_DIINDEL::SIZE; ++sgt) { not_somfrom_sum[sgt]=nonsomatic_sum; for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) { if (sgt==ngt) continue; for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) { if (tgt==ngt) continue; not_somfrom_sum[sgt] += pprob[DDIINDEL::get_state(ngt,tgt)]; } } } rs.sindel_from_ref_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::NOINDEL]); rs.sindel_from_het_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::HET]); rs.sindel_from_hom_qphred=error_prob_to_qphred(not_somfrom_sum[STAR_DIINDEL::HOM]); double not_somfromanyhom_sum(nonsomatic_sum); for (unsigned ngt(0); ngt<STAR_DIINDEL::SIZE; ++ngt) { if (STAR_DIINDEL::HET != ngt) continue; for (unsigned tgt(0); tgt<STAR_DIINDEL::SIZE; ++tgt) { if (tgt==ngt) continue; not_somfromanyhom_sum += pprob[DDIINDEL::get_state(ngt,tgt)]; } } rs.sindel_from_anyhom_qphred=error_prob_to_qphred(not_somfromanyhom_sum); rs.max_gt_qphred=error_prob_to_qphred(prob_comp(pprob.begin(),pprob.end(),rs.max_gt)); }
// Given the likelihood, go through the final computations to get the // posterior and derived values. // static void calculate_result_set_grid(const blt_float_t* normal_lhood, const blt_float_t* tumor_lhood, const somatic_snv_caller_strand_grid::prior_set& pset, const blt_float_t lnmatch, const blt_float_t lnmismatch, const unsigned /*ref_gt*/, result_set& rs) { // a piece transplanted from 1150 to make a formal correction to // the priors which should have a low-impact on the results. // the prior below is incomplete #ifdef DEBUG_ALTERNATE_PRIOR static const double neginf(-std::numeric_limits<double>::infinity()); std::vector<double> prior(DDIGT_SGRID::SIZE); std::fill(prior.begin(),prior.end(),neginf); // this zero'd code is incomplete and abandoned for now...: #if 0 for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { double base_prior(neginf); const bool is_noise(ngt>=STAR_DIINDEL::SIZE); if(is_noise) { base_prior=pset.normal[ngt]; } else { base_prior=pset.nonoise[ngt]; } for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch ); const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt)); prior[dgt] = normal_genomic_lnprior[ngt]+tgt_prior_mod; } } for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) { const unsigned dgt(DDIGT_SGRID::get_state(gt,gt)); prior[dgt] = normal_genomic_lnprior[gt]+lnmatch; } #endif check_ln_distro(prior.begin(), prior.end(), "somatic snv full prior"); #endif // intentionally use higher float res for this structure: std::vector<double> pprob(DDIGT_SGRID::SIZE); // mult by prior distro to get unnormalized pprob for states in // the regular grid model: // for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const unsigned dgt(DDIGT_SGRID::get_state(ngt,tgt)); #if 0 // the trusty old way...: const blt_float_t tgt_prior_mod( (tgt==ngt) ? lnmatch : lnmismatch ); pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+pset.normal[ngt]+tgt_prior_mod; #else // unorm takes the role of the normal prior for the somatic case: // static const blt_float_t unorm(std::log(static_cast<blt_float_t>(DIGT_SGRID::PRESTRAND_SIZE))); blt_float_t prior; if(tgt==ngt) { prior=pset.normal[ngt]+lnmatch; } else { prior=pset.somatic_marginal[ngt]+lnmismatch; } pprob[dgt] = normal_lhood[ngt]+tumor_lhood[tgt]+prior; #endif } } // Now add the single-strand noise states. note that these states // are unique in that we don't look for mixtures of somatic // variation with these noise states, b/c single-strand // observations can almost exclusively be ruled out as noise: // for(unsigned gt(DIGT_SGRID::PRESTRAND_SIZE); gt<DIGT_SGRID::SIZE; ++gt) { const unsigned dgt(DDIGT_SGRID::get_state(gt,gt)); pprob[dgt] = normal_lhood[gt]+tumor_lhood[gt]+pset.normal[gt]+lnmatch; } opt_normalize_ln_distro(pprob.begin(),pprob.end(),DDIGT_SGRID::is_nonsom.val.begin(),rs.max_gt); //normalize_ln_distro(pprob.begin(),pprob.end(),rs.max_gt); double nonsomatic_sum(0); for(unsigned gt(0); gt<DIGT_SGRID::SIZE; ++gt) { nonsomatic_sum += pprob[DDIGT_SGRID::get_state(gt,gt)]; } rs.snv_qphred=error_prob_to_qphred(nonsomatic_sum); if(0==rs.snv_qphred) return; #if 0 // alternate way to calculate the joint: // double min_not_somfrom_sum(0); for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) { double not_somfrom_sum(nonsomatic_sum); for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { // we're looking for the joint prob when state dgt is true // in the normal, so skip this as a normal state here: // if(dgt==ngt) continue; for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { // we've already started from the nonsomatic som, so we can skip the equal states: // if(ngt==tgt) continue; not_somfrom_sum += pprob[DDIGT_SGRID::get_state(ngt,tgt)]; } } if((dgt==0) || (!_somfrom_sum<min_not_somfrom_sum)) { min_not_somfrom_sum=not_somfrom_sum; rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum); rs.ntype=dgt; } } #endif #if 0 // reset max_gt to the most likely state excluding normal noise states: // rs.max_gt=0; for(unsigned dgt(0); dgt<DIGT::SIZE; ++dgt) { for(unsigned tgt(0); tgt<DIGT_SGRID::PRESTRAND_SIZE; ++tgt) { const unsigned xgt(DDIGT_SGRID::get_state(dgt,tgt)); if(pprob[xgt] > pprob[rs.max_gt]) rs.max_gt=xgt; } } #endif // Calculate normal distribution alone so that we can classify this call: // // Polymorphic prior is used because in this situation we want to // be conservative about the reference classification -- // ie. conditioned on only looking at putative somatic sites, we // require evidence to show that the normal is in fact reference // and not simply an unsampled copy of the somatic variation. // std::vector<double> normal_pprob(DIGT_SGRID::PRESTRAND_SIZE); for(unsigned ngt(0); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { normal_pprob[ngt] = normal_lhood[ngt]+pset.normal_poly[ngt]; } unsigned max_norm_gt(0); normalize_ln_distro(normal_pprob.begin(),normal_pprob.end(),max_norm_gt); // find the probability of max_norm_gt: const double ngt_prob(prob_comp(normal_pprob.begin(),normal_pprob.end(),max_norm_gt)); // (1-(1-a)(1-b)) -> a+b-(ab) double not_somfrom_sum(nonsomatic_sum+ngt_prob-(nonsomatic_sum*ngt_prob)); rs.snv_from_ntype_qphred=error_prob_to_qphred(not_somfrom_sum); rs.ntype=max_norm_gt; }