/// get the indel cigar and ref and indel strings used in the indel /// summary line output /// static void get_vcf_summary_strings(const indel_key& ik, const indel_data& id, const reference_contig_segment& ref, std::string& vcf_indel_seq, std::string& vcf_ref_seq) { if (ik.is_breakpoint()) { if (ik.type == INDEL::BP_LEFT) { copy_ref_subseq(ref,ik.pos-1,ik.pos,vcf_ref_seq); vcf_indel_seq = vcf_ref_seq + id.get_insert_seq() + '.'; } else if(ik.type == INDEL::BP_RIGHT) { copy_ref_subseq(ref,ik.pos,ik.pos+1,vcf_ref_seq); vcf_indel_seq = '.' + id.get_insert_seq() + vcf_ref_seq; } else { assert(0); } } else { copy_ref_subseq(ref,ik.pos-1,ik.pos+ik.delete_length(),vcf_ref_seq); copy_ref_subseq(ref,ik.pos-1,ik.pos,vcf_indel_seq); vcf_indel_seq += id.get_insert_seq(); } }
void indel_digt_caller:: get_indel_digt_lhood(const starling_options& opt, const starling_deriv_options& dopt, const starling_sample_options& sample_opt, const double indel_error_prob, const double ref_error_prob, const indel_key& ik, const indel_data& id, const bool is_het_bias, const double het_bias, const bool is_tier2_pass, const bool is_use_alt_indel, double* const lhood) { static const double loghalf(-std::log(2.)); for (unsigned gt(0); gt<STAR_DIINDEL::SIZE; ++gt) lhood[gt] = 0.; const bool is_breakpoint(ik.is_breakpoint()); const double indel_error_lnp(std::log(indel_error_prob)); const double indel_real_lnp(std::log(1.-indel_error_prob)); const double ref_error_lnp(std::log(ref_error_prob)); const double ref_real_lnp(std::log(1.-ref_error_prob)); // typedef read_path_scores::alt_indel_t::const_iterator aiter; typedef indel_data::score_t::const_iterator siter; siter it(id.read_path_lnp.begin()), it_end(id.read_path_lnp.end()); for (; it!=it_end; ++it) { const read_path_scores& path_lnp(it->second); // optionally skip tier2 data: if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; // get alt path lnp: double alt_path_lnp(path_lnp.ref); #if 0 if (is_use_alt_indel && path_lnp.is_alt && (path_lnp.alt > alt_path_lnp)) { alt_path_lnp=path_lnp.alt; } #else if (is_use_alt_indel and (not path_lnp.alt_indel.empty()) ) { typedef read_path_scores::alt_indel_t::const_iterator aiter; aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end()); for (; j!=j_end; ++j) { if (j->second>alt_path_lnp) alt_path_lnp=j->second; } } #endif const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp)); const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp)); // allele ratio convention is that the indel occurs at the // het_allele ratio and the alternate allele occurs at // (1-het_allele_ratio): double log_ref_prob(loghalf); double log_indel_prob(loghalf); if (not is_breakpoint) { static const double het_allele_ratio(0.5); get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,het_allele_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); lhood[STAR_DIINDEL::NOINDEL] += integrate_out_sites(dopt,path_lnp.nsite,noindel_lnp,is_tier2_pass); lhood[STAR_DIINDEL::HOM] += integrate_out_sites(dopt,path_lnp.nsite,hom_lnp,is_tier2_pass); lhood[STAR_DIINDEL::HET] += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); #ifdef DEBUG_INDEL_CALL //log_os << std::setprecision(8); //log_os << "INDEL_CALL i,ref_lnp,indel_lnp,lhood(noindel),lhood(hom),lhood(het): " << i << " " << path_lnp.ref << " " << path_lnp.indel << " " << lhood[STAR_DIINDEL::NOINDEL] << " " << lhood[STAR_DIINDEL::HOM] << " " << lhood[STAR_DIINDEL::HET] << "\n"; #endif } if (is_het_bias) { // loop is currently setup to assume a uniform het ratio subgenotype prior const unsigned n_bias_steps(1+static_cast<unsigned>(het_bias/opt.het_bias_max_ratio_inc)); const double ratio_increment(het_bias/static_cast<double>(n_bias_steps)); for (unsigned step(0); step<n_bias_steps; ++step) { const double het_ratio(0.5+(step+1)*ratio_increment); increment_het_ratio_lhood(opt,dopt,sample_opt, indel_error_lnp,indel_real_lnp, ref_error_lnp,ref_real_lnp, ik,id,het_ratio,is_tier2_pass,is_use_alt_indel,lhood); } const unsigned n_het_subgt(1+2*n_bias_steps); const double subgt_log_prior(std::log(static_cast<double>(n_het_subgt))); lhood[STAR_DIINDEL::HET] -= subgt_log_prior; } }
void indel_digt_caller:: get_high_low_het_ratio_lhood(const starling_options& /*opt*/, const starling_deriv_options& dopt, const starling_sample_options& sample_opt, const double indel_error_lnp, const double indel_real_lnp, const double ref_error_lnp, const double ref_real_lnp, const indel_key& ik, const indel_data& id, const double het_ratio, const bool is_tier2_pass, const bool is_use_alt_indel, double& het_lhood_high, double& het_lhood_low) { // handle het ratio and its complement in one step: const double chet_ratio(1.-het_ratio); const double log_het_ratio(std::log(het_ratio)); const double log_chet_ratio(std::log(chet_ratio)); const bool is_breakpoint(ik.is_breakpoint()); het_lhood_high=0; het_lhood_low=0; // typedef read_path_scores::alt_indel_t::const_iterator aiter; typedef indel_data::score_t::const_iterator siter; siter i(id.read_path_lnp.begin()), i_end(id.read_path_lnp.end()); for (; i!=i_end; ++i) { const read_path_scores& path_lnp(i->second); // optionally skip tier2 data: if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; // get alt path lnp: double alt_path_lnp(path_lnp.ref); #if 0 if (is_use_alt_indel && path_lnp.is_alt && (path_lnp.alt > alt_path_lnp)) { alt_path_lnp=path_lnp.alt; } #else if (is_use_alt_indel && (! path_lnp.alt_indel.empty()) ) { typedef read_path_scores::alt_indel_t::const_iterator aiter; aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end()); for (; j!=j_end; ++j) { if (j->second>alt_path_lnp) alt_path_lnp=j->second; } } #endif const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp)); const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp)); // allele ratio convention is that the indel occurs at the // het_allele ratio and the alternate allele occurs at // (1-het_allele_ratio): { double log_ref_prob(log_chet_ratio); double log_indel_prob(log_het_ratio); if (! is_breakpoint) { get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,het_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); het_lhood_low += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); } { double log_ref_prob(log_het_ratio); double log_indel_prob(log_chet_ratio); if (! is_breakpoint) { get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,chet_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); het_lhood_high += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); } } }