qphred_cache:: qphred_cache() { for (int i(0); i<=MAX_QSCORE; ++i) { q2p[i] = phred_to_error_prob(static_cast<double>(i)); } for (int i(0); i<=MAX_QSCORE; ++i) { q2lncompe[i] = log1p_switch(-q2p[i]); } static const double q2lnp(-std::log(10.)/10.); for (int i(0); i<=MAX_QSCORE; ++i) { q2lne[i] = static_cast<double>(i)*q2lnp; } for (int i(0); i<=MAX_QSCORE; ++i) { for (int j(0); j<=MAX_MAP; ++j) { mappedq[j][i] = error_prob_to_qphred(phred_to_mapped_error_prob(i,j)); } } }
somatic_snv_caller_strand_grid:: somatic_snv_caller_strand_grid(const strelka_options& opt, const pprob_digt_caller& pd_caller) : _opt(opt) { _ln_som_match=(log1p_switch(-opt.somatic_snv_rate)); _ln_som_mismatch=(std::log(opt.somatic_snv_rate/(static_cast<blt_float_t>((DIGT_SGRID::PRESTRAND_SIZE)-1)))); for(unsigned i(0); i<(N_BASE+1); ++i) { prior_set& ps(_lnprior[i]); std::fill(ps.normal.begin(),ps.normal.end(),0); std::fill(ps.normal_poly.begin(),ps.normal_poly.end(),0); } for(unsigned i(0); i<(N_BASE+1); ++i) { prior_set& ps(_lnprior[i]); get_prior(pd_caller.lnprior_genomic(i),i, opt.shared_site_error_rate, opt.shared_site_error_strand_bias_fraction, opt.site_somatic_normal_noise_rate, opt.is_site_somatic_normal_noise_rate, ps.normal, ps.somatic_marginal); get_prior(pd_caller.lnprior_polymorphic(i),i, opt.shared_site_error_rate, opt.shared_site_error_strand_bias_fraction, opt.site_somatic_normal_noise_rate, opt.is_site_somatic_normal_noise_rate, ps.normal_poly, ps.somatic_marginal_poly); } }
somatic_indel_caller:: somatic_indel_caller(const strelka_options& opt, const indel_digt_caller& in_caller) : _in_caller(in_caller) { _ln_som_match=(log1p_switch(-opt.somatic_indel_rate)); _ln_som_mismatch=(std::log(opt.somatic_indel_rate/(static_cast<double>(STAR_DIINDEL::SIZE-1)))); }
indel_digt_caller:: indel_digt_caller(const double theta) { _lnprior_genomic[STAR_DIINDEL::NOINDEL]=log1p_switch(-(3.*theta)/2.); _lnprior_genomic[STAR_DIINDEL::HOM]=std::log(theta/2.); _lnprior_genomic[STAR_DIINDEL::HET]=std::log(theta); _lnprior_polymorphic[STAR_DIINDEL::NOINDEL]=0.25; _lnprior_polymorphic[STAR_DIINDEL::HOM]=0.25; _lnprior_polymorphic[STAR_DIINDEL::HET]=0.5; }
// compute just the non-strand-bias portion of the normal marginal // prior given p(signal), p(no-strand noise), p(strand-bias noise) // static void get_nostrand_marginal_prior(const blt_float_t* normal_lnprior, const unsigned ref_gt, const blt_float_t sse_rate, const blt_float_t sseb_fraction, std::vector<blt_float_t>& grid_normal_lnprior) { const blt_float_t strand_sse_rate(sse_rate*sseb_fraction); const blt_float_t nostrand_sse_rate(sse_rate-strand_sse_rate); const blt_float_t ln_csse_rate( log1p_switch(-sse_rate) ); // const blt_float_t ln_strand_sse_rate( std::log(strand_sse_rate) ); const blt_float_t ln_nostrand_sse_rate( std::log(nostrand_sse_rate) ); // fill in normal sample prior for canonical diploid allele frequencies: for(unsigned ngt(0); ngt<DIGT::SIZE; ++ngt) { grid_normal_lnprior[ngt] = (normal_lnprior[ngt]+ln_csse_rate); } // nostrand noise prior distributions for each allele combination axis: // // weight the prior by the potential originating genotypes: // if on AB axis, we want P(AA+noiseB)+P(AB+noise)+P(BB+noiseA) // so we have P(AA)*error_prob /3 + P(AB)*error_prob + P(BB)*error_prob/3 // static const unsigned n_het_axes(6); blt_float_t nostrand_axis_prior[n_het_axes]; for(unsigned ngt(N_BASE); ngt<DIGT::SIZE; ++ngt) { const unsigned axis_id(ngt-N_BASE); nostrand_axis_prior[axis_id] = normal_lnprior[ngt]; // get the two associated homs: for(unsigned b(0); b<N_BASE; ++b) { if(DIGT::expect2(b,ngt)<=0) continue; nostrand_axis_prior[axis_id] = log_sum(nostrand_axis_prior[axis_id], normal_lnprior[b]+ln_one_third); } } static const blt_float_t error_mod( -std::log(static_cast<blt_float_t>(DIGT_SGRID::HET_RES*2)) ); // fill in normal sample prior for 'noise' frequencies: for(unsigned ngt(DIGT::SIZE); ngt<DIGT_SGRID::PRESTRAND_SIZE; ++ngt) { // 'ngt2' is the root diploid state corresponding to noise // state 'ngt' const unsigned ngt2(DIGT_SGRID::get_digt_state(ngt,ref_gt)); assert(ngt2>=N_BASE); const unsigned axis_id(ngt2-N_BASE); grid_normal_lnprior[ngt] = (nostrand_axis_prior[axis_id]+ln_nostrand_sse_rate+error_mod); // grid_normal_lnprior[ngt] = (normal_lnprior[ngt2]+ln_sse_rate+error_mod); } }
void position_nonref_2allele_test(const snp_pos_info& pi, const blt_options& opt, const bool /*is_always_test*/, nonref_test_call& nrc) { static const bool is_mle_freq(false); if (pi.ref_base=='N') return; // add early escape test here? // 1. Determine the two 'primary' alleles -- Simple test just adds // up qscores to determine which alleles are primary. // nrc.nonref_id=(BASE_ID::ANY); //unsigned nonref2_id(BASE_ID::ANY); // just ignore this value for now.... { double qtot[N_BASE]; for (unsigned i(0); i<N_BASE; ++i) qtot[i] = 0; const unsigned n_calls(pi.calls.size()); for (unsigned i(0); i<n_calls; ++i) { if (pi.calls[i].base_id==BASE_ID::ANY) continue; qtot[pi.calls[i].base_id] += pi.calls[i].get_qscore(); } // get max and max2: unsigned max_id=0; unsigned max2_id=1; for (unsigned b(1); b<N_BASE; ++b) { if (qtot[b] > qtot[max_id]) { max2_id = max_id; max_id = b; } else if (qtot[b] > qtot[max2_id]) { max2_id = b; } } const unsigned ref_id=base_to_id(pi.ref_base); if (ref_id==max_id) { nrc.nonref_id=max2_id; #if 0 } else if (ref_id==max2_id) { nrc.nonref_id=max_id; #endif } else { nrc.nonref_id=max_id; //nonref2_id=max2_id; } } blt_float_t lhood[NR2TEST::SIZE]; lhood[NR2TEST::REF] = calc_pos_nonref_freq_loghood(pi,0.); sparse_function sf; nonref_allele_freq_loghood_sparse_func nlf(pi,nrc.nonref_id,sf); sample_uniform_range(0.,1.,nlf); //sample_uniform_range(min_nonref_freq,1.,nlf); lhood[NR2TEST::NONREF_MF] = integrate_ln_sparsefunc(sf, opt.min_nonref_freq, 1,1,1); lhood[NR2TEST::NONREF_MF_NOISE] = integrate_ln_sparsefunc(sf, 0, opt.nonref_site_error_decay_freq,1,0); static const blt_float_t neginf(-std::numeric_limits<blt_float_t>::infinity()); lhood[NR2TEST::NONREF_OTHER] = neginf; //std::cerr << "WAGART: logh ref/nonef: " << lhood[0] << " " << lhood[1] << "\n"; // TODO: ctor compute this: // this goes in here just in case someone cranks both parameters up near 1: // const double nonref_variant_rate_used = opt.nonref_variant_rate*(1-opt.nonref_site_error_rate); blt_float_t prior[NR2TEST::SIZE]; prior[NR2TEST::REF] = log1p_switch(-(nonref_variant_rate_used+opt.nonref_site_error_rate)); prior[NR2TEST::NONREF_MF] = std::log(nonref_variant_rate_used/3); prior[NR2TEST::NONREF_MF_NOISE] = std::log(opt.nonref_site_error_rate); prior[NR2TEST::NONREF_OTHER] = std::log(2*nonref_variant_rate_used/3); double pprob[NR2TEST::SIZE]; for (unsigned i(0); i<NR2TEST::SIZE; ++i) { pprob[i] = lhood[i] + prior[i]; } normalize_ln_distro(pprob,pprob+NR2TEST::SIZE,nrc.max_gt); nrc.snp_qphred=error_prob_to_qphred(pprob[NR2TEST::REF]+pprob[NR2TEST::NONREF_MF_NOISE]); nrc.max_gt_qphred=error_prob_to_qphred(prob_comp(pprob,pprob+NR2TEST::SIZE,nrc.max_gt)); nrc.is_snp=(nrc.snp_qphred != 0); if (! (is_mle_freq && nrc.is_snp)) return; #if 0 const double null_loghood(calc_pos_nonref_freq_loghood(pi,0.)); // heuristic to escape early: static const double p_delta(0.001); const double delta_loghood(calc_pos_nonref_freq_loghood(pi,p_delta)); if (null_loghood > delta_loghood) return; double x_nonref_freq; double x_loghood; position_nonref_freq_loghood_minfunc mf(epi); static const double x1(0.5); static const double x2(0.4); codemin::minimize_1d(x1,x2,mf.val(x1),mf,x_nonref_freq,x_loghood); x_nonref_freq = mf.arg_to_prob(x_nonref_freq); const double log_lrt(-2.*(x_loghood+null_loghood)); // becuase null has the parameter fixed to a boundary value, the // asymmtotic distribution is a 50:50 mixture of csq(0) and chq(1) // -- the same effect as multiplying alpha of csq(1) by 2, dividing // the null prob by 2. (as we do below): boost::math::chi_squared dist(1); const double null_prob((1.-boost::math::cdf(dist,log_lrt))/2.); sc.is_snp=(null_prob<alpha); sc.null_loghood=null_loghood; sc.min_test_loghood=-x_loghood; sc.snp_prob=1.-null_prob; // if it's a snp then get additional information on non-reference // allele frequencies. // if (not sc.is_snp) return; static const double line_tol(1e-7); static const double start_ratio(0.05); static const double min_start_dist(1e-6); static const double end_tol(1e-7); static const unsigned max_iter(200); const unsigned ref_base_id(base_to_id(pi.ref_base)); const double ref_freq(1.-x_nonref_freq); const double nonref_freq((x_nonref_freq)/3.); for (unsigned i(0); i<N_BASE; ++i) { if (i==ref_base_id) sc.allele_freq[i] = ref_freq; else sc.allele_freq[i] = nonref_freq; } static const unsigned N_BASE2(N_BASE*N_BASE); double conj_dir[N_BASE2]; std::fill(conj_dir,conj_dir+N_BASE2,0.); for (unsigned i(0); i<N_BASE; ++i) { const double start_dist( std::max(std::fabs(sc.allele_freq[i]*start_ratio),min_start_dist) ); conj_dir[i*(N_BASE+1)] = start_dist; } double start_tol(end_tol); unsigned iter; double x_all_loghood; double final_dlh; position_allele_distro_loghood_minfunc alm(epi); codemin::minimize_conj_direction(sc.allele_freq,conj_dir,alm,start_tol,end_tol,line_tol, x_all_loghood,iter,final_dlh,max_iter); alm.arg_to_prob(sc.allele_freq,sc.allele_freq); sc.min_loghood=-x_all_loghood; #endif }