// total the path likelihoods of ref,indel and alt_indel states // void get_sum_path_pprob(const starling_deriv_options& dopt, const indel_data& id, const bool is_tier2_pass, const bool is_use_alt_indel, read_path_scores& total_pprob, const bool is_init_total) { static const double initval(0); if (is_init_total) { total_pprob.ref=initval; total_pprob.indel=initval; total_pprob.nsite=0; } typedef std::map<indel_key,unsigned> aimap_t; aimap_t alt_indel_index; typedef indel_data::score_t::const_iterator siter; const siter i_start(id.read_path_lnp.begin()), i_end(id.read_path_lnp.end()); for (siter i(i_start); i!=i_end; ++i) { const read_path_scores& path_lnp(i->second); // optionally skip tier2 data: if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; const read_path_scores path_pprob(indel_lnp_to_pprob(dopt,path_lnp,is_tier2_pass,is_use_alt_indel)); total_pprob.indel += path_pprob.indel; total_pprob.ref += path_pprob.ref; if (! is_use_alt_indel) continue; typedef read_path_scores::alt_indel_t::const_iterator aciter; aciter j(path_pprob.alt_indel.begin()), j_end(path_pprob.alt_indel.end()); for (; j!=j_end; ++j) { aimap_t::iterator tj(alt_indel_index.find(j->first)); if (tj==alt_indel_index.end()) { alt_indel_index[j->first]=total_pprob.alt_indel.size(); total_pprob.alt_indel.push_back(*j); } else { total_pprob.alt_indel[tj->second].second += j->second; } } } }
ordinal_type Stokhos::Sparse3Tensor<ordinal_type, value_type>:: num_entries() const { #ifdef STOKHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPTION(fill_completed == false, std::logic_error, "You must call fillComplete() before calling num_entries()!"); #endif ordinal_type num = 0; for (k_iterator k = k_begin(); k != k_end(); ++k) for (kj_iterator j = j_begin(k); j != j_end(k); ++j) for (kji_iterator i = i_begin(j); i != i_end(j); ++i) ++num; return num; }
void Stokhos::Sparse3Tensor<ordinal_type, value_type>:: print(std::ostream& os) const { #ifdef STOKHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPTION(fill_completed == false, std::logic_error, "You must call fillComplete() before calling print()!"); #endif for (k_iterator k=k_begin(); k!=k_end(); ++k) for (kj_iterator j=j_begin(k); j!=j_end(k); ++j) for (kji_iterator i=i_begin(j); i!=i_end(j); ++i) os << "k = " << index(k) << ", j = " << index(j) << ", i = " << index(i) << ", Cijk = " << value(i) << std::endl; }
void starling_read_buffer:: clear_pos(const starling_options& opt, const pos_t pos) { const pos_group_t::iterator i(_pos_group.find(pos)); if(i == _pos_group.end()) return; segment_group_t& seg_group(i->second); segment_group_t::const_iterator j(seg_group.begin()),j_end(seg_group.end()); for(; j!=j_end; ++j) { const align_id_t read_id(j->first); const seg_id_t seg_id(j->second); const read_data_t::iterator k(_read_data.find(read_id)); if(k == _read_data.end()) continue; const starling_read* srp(k->second); // only remove read from data structure when we find the last // segment: -- note this assumes that two segments will not // occur at the same position: // if(seg_id != srp->segment_count()) continue; // remove from contigs: typedef contig_align_t cat; const cat& ca(srp->contig_align()); cat::const_iterator m(ca.begin()), m_end(ca.end()); for(; m!=m_end; ++m) { const align_id_t contig_id(m->first); align_id_group_t::iterator p(_contig_group.find(contig_id)); if(p==_contig_group.end()) continue; p->second.erase(read_id); if(p->second.empty()) _contig_group.erase(p); } // remove from simple lookup structures and delete read itself: _read_data.erase(k); if(! opt.is_ignore_read_names) _read_key.erase(srp->key()); delete srp; } _pos_group.erase(i); }
value_type Stokhos::Sparse3Tensor<ordinal_type, value_type>:: getValue(ordinal_type i, ordinal_type j, ordinal_type k) const { #ifdef STOKHOS_DEBUG TEUCHOS_TEST_FOR_EXCEPTION(fill_completed == false, std::logic_error, "You must call fillComplete() before calling getValue()!"); #endif k_iterator k_it = find_k(k); if (k_it == k_end()) return value_type(0); kj_iterator j_it = find_j(k_it, j); if (j_it == j_end(k_it)) return value_type(0); kji_iterator i_it = find_i(j_it, i); if (i_it == i_end(j_it)) return value_type(0); return i_it.value(); }
void starling_read_buffer:: dump_pos(const pos_t pos, std::ostream& os) const { const pos_group_t::const_iterator i(_pos_group.find(pos)); if(i == _pos_group.end()) return; os << "READ_BUFFER_POSITION: " << pos << " DUMP ON\n"; const segment_group_t& seg_group(i->second); segment_group_t::const_iterator j(seg_group.begin()),j_end(seg_group.end()); for(unsigned r(0); j!=j_end; ++j) { const align_id_t read_id(j->first); const seg_id_t seg_id(j->second); const read_data_t::const_iterator k(_read_data.find(read_id)); if(k == _read_data.end()) continue; const starling_read& sr(*(k->second)); os << "READ_BUFFER_POSITION: " << pos << " read_segment_no: " << ++r << " seg_id: " << seg_id << "\n"; os << sr.get_segment(seg_id); } os << "READ_BUFFER_POSITION: " << pos << " DUMP OFF\n"; }
void indel_digt_caller:: get_indel_digt_lhood(const starling_options& opt, const starling_deriv_options& dopt, const starling_sample_options& sample_opt, const double indel_error_prob, const double ref_error_prob, const indel_key& ik, const indel_data& id, const bool is_het_bias, const double het_bias, const bool is_tier2_pass, const bool is_use_alt_indel, double* const lhood) { static const double loghalf(-std::log(2.)); for (unsigned gt(0); gt<STAR_DIINDEL::SIZE; ++gt) lhood[gt] = 0.; const bool is_breakpoint(ik.is_breakpoint()); const double indel_error_lnp(std::log(indel_error_prob)); const double indel_real_lnp(std::log(1.-indel_error_prob)); const double ref_error_lnp(std::log(ref_error_prob)); const double ref_real_lnp(std::log(1.-ref_error_prob)); // typedef read_path_scores::alt_indel_t::const_iterator aiter; typedef indel_data::score_t::const_iterator siter; siter it(id.read_path_lnp.begin()), it_end(id.read_path_lnp.end()); for (; it!=it_end; ++it) { const read_path_scores& path_lnp(it->second); // optionally skip tier2 data: if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; // get alt path lnp: double alt_path_lnp(path_lnp.ref); #if 0 if (is_use_alt_indel && path_lnp.is_alt && (path_lnp.alt > alt_path_lnp)) { alt_path_lnp=path_lnp.alt; } #else if (is_use_alt_indel and (not path_lnp.alt_indel.empty()) ) { typedef read_path_scores::alt_indel_t::const_iterator aiter; aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end()); for (; j!=j_end; ++j) { if (j->second>alt_path_lnp) alt_path_lnp=j->second; } } #endif const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp)); const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp)); // allele ratio convention is that the indel occurs at the // het_allele ratio and the alternate allele occurs at // (1-het_allele_ratio): double log_ref_prob(loghalf); double log_indel_prob(loghalf); if (not is_breakpoint) { static const double het_allele_ratio(0.5); get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,het_allele_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); lhood[STAR_DIINDEL::NOINDEL] += integrate_out_sites(dopt,path_lnp.nsite,noindel_lnp,is_tier2_pass); lhood[STAR_DIINDEL::HOM] += integrate_out_sites(dopt,path_lnp.nsite,hom_lnp,is_tier2_pass); lhood[STAR_DIINDEL::HET] += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); #ifdef DEBUG_INDEL_CALL //log_os << std::setprecision(8); //log_os << "INDEL_CALL i,ref_lnp,indel_lnp,lhood(noindel),lhood(hom),lhood(het): " << i << " " << path_lnp.ref << " " << path_lnp.indel << " " << lhood[STAR_DIINDEL::NOINDEL] << " " << lhood[STAR_DIINDEL::HOM] << " " << lhood[STAR_DIINDEL::HET] << "\n"; #endif } if (is_het_bias) { // loop is currently setup to assume a uniform het ratio subgenotype prior const unsigned n_bias_steps(1+static_cast<unsigned>(het_bias/opt.het_bias_max_ratio_inc)); const double ratio_increment(het_bias/static_cast<double>(n_bias_steps)); for (unsigned step(0); step<n_bias_steps; ++step) { const double het_ratio(0.5+(step+1)*ratio_increment); increment_het_ratio_lhood(opt,dopt,sample_opt, indel_error_lnp,indel_real_lnp, ref_error_lnp,ref_real_lnp, ik,id,het_ratio,is_tier2_pass,is_use_alt_indel,lhood); } const unsigned n_het_subgt(1+2*n_bias_steps); const double subgt_log_prior(std::log(static_cast<double>(n_het_subgt))); lhood[STAR_DIINDEL::HET] -= subgt_log_prior; } }
void indel_digt_caller:: get_high_low_het_ratio_lhood(const starling_options& /*opt*/, const starling_deriv_options& dopt, const starling_sample_options& sample_opt, const double indel_error_lnp, const double indel_real_lnp, const double ref_error_lnp, const double ref_real_lnp, const indel_key& ik, const indel_data& id, const double het_ratio, const bool is_tier2_pass, const bool is_use_alt_indel, double& het_lhood_high, double& het_lhood_low) { // handle het ratio and its complement in one step: const double chet_ratio(1.-het_ratio); const double log_het_ratio(std::log(het_ratio)); const double log_chet_ratio(std::log(chet_ratio)); const bool is_breakpoint(ik.is_breakpoint()); het_lhood_high=0; het_lhood_low=0; // typedef read_path_scores::alt_indel_t::const_iterator aiter; typedef indel_data::score_t::const_iterator siter; siter i(id.read_path_lnp.begin()), i_end(id.read_path_lnp.end()); for (; i!=i_end; ++i) { const read_path_scores& path_lnp(i->second); // optionally skip tier2 data: if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; // get alt path lnp: double alt_path_lnp(path_lnp.ref); #if 0 if (is_use_alt_indel && path_lnp.is_alt && (path_lnp.alt > alt_path_lnp)) { alt_path_lnp=path_lnp.alt; } #else if (is_use_alt_indel && (! path_lnp.alt_indel.empty()) ) { typedef read_path_scores::alt_indel_t::const_iterator aiter; aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end()); for (; j!=j_end; ++j) { if (j->second>alt_path_lnp) alt_path_lnp=j->second; } } #endif const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp)); const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp)); // allele ratio convention is that the indel occurs at the // het_allele ratio and the alternate allele occurs at // (1-het_allele_ratio): { double log_ref_prob(log_chet_ratio); double log_indel_prob(log_het_ratio); if (! is_breakpoint) { get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,het_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); het_lhood_low += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); } { double log_ref_prob(log_het_ratio); double log_indel_prob(log_chet_ratio); if (! is_breakpoint) { get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank, ik,chet_ratio,log_ref_prob,log_indel_prob); } const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob)); het_lhood_high += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass); } } }
void get_starling_indel_sample_report_info(const starling_deriv_options& dopt, const indel_key& ik, const indel_data& id, const pos_basecall_buffer& bc_buff, const bool is_tier2_pass, const bool is_use_alt_indel, starling_indel_sample_report_info& isri) { // get read info: { static const double path_pprob_thresh(0.999); unsigned n_subscore_reads(0); typedef indel_data::score_t::const_iterator siter; siter i(id.read_path_lnp.begin()), i_end(id.read_path_lnp.end()); for(; i!=i_end; ++i) { const read_path_scores& path_lnp(i->second); // optionally skip tier2 data: if((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue; const read_path_scores pprob(indel_lnp_to_pprob(dopt,path_lnp,is_tier2_pass,is_use_alt_indel)); if (pprob.ref >= path_pprob_thresh) { isri.n_q30_ref_reads++; } else if(pprob.indel >= path_pprob_thresh) { isri.n_q30_indel_reads++; } else { typedef read_path_scores::alt_indel_t::const_iterator aciter; bool is_alt_found(false); #if 0 if(pprob.is_alt && (pprob.alt >= path_pprob_thresh)) { isri.n_q30_alt_reads++; is_alt_found=true; } #else aciter j(pprob.alt_indel.begin()), j_end(pprob.alt_indel.end()); for(; j!=j_end; ++j) { if(j->second >= path_pprob_thresh) { isri.n_q30_alt_reads++; is_alt_found=true; break; } } #endif if(! is_alt_found) { n_subscore_reads++; } } } // total number of reads with non-zero, yet insufficient indel // breakpoint overlap const unsigned n_suboverlap_tier1_reads(id.suboverlap_tier1_read_ids.size()); isri.n_other_reads = (n_subscore_reads+n_suboverlap_tier1_reads); if(is_tier2_pass) { const unsigned n_suboverlap_tier2_reads(id.suboverlap_tier2_read_ids.size()); isri.n_other_reads += n_suboverlap_tier2_reads; } } { // get depth of indel: pos_t depth_pos(ik.pos-1); if(ik.type==INDEL::BP_RIGHT) depth_pos=ik.pos; const snp_pos_info* spi_ptr(bc_buff.get_pos(depth_pos)); if(NULL==spi_ptr) { isri.depth=0; } else { isri.depth=spi_ptr->calls.size(); } } }
read_path_scores indel_lnp_to_pprob(const starling_deriv_options& dopt, const read_path_scores& path_lnp, const bool is_tier2_pass, const bool is_use_alt_indel) { typedef read_path_scores::alt_indel_t::const_iterator aciter; typedef read_path_scores::alt_indel_t::iterator aiter; unsigned n_alleles(2); if(is_use_alt_indel) { //if(path_lnp.is_alt) n_alleles++; n_alleles += path_lnp.alt_indel.size(); } static const double allele_prior(1./static_cast<double>(n_alleles)); static const double allele_lnprior(std::log(allele_prior)); read_path_scores pprob; read_path_scores::score_t pprob_nonsite = dopt.get_nonsite_path_lnp(is_tier2_pass,path_lnp.nsite) + dopt.nonsite_lnprior;; pprob.ref = path_lnp.ref + dopt.site_lnprior + allele_lnprior; pprob.indel = path_lnp.indel + dopt.site_lnprior + allele_lnprior; if(is_use_alt_indel) { #if 0 if(path_lnp.is_alt) { pprob.insert_alt(path_lnp.alt + dopt.site_lnprior + allele_lnprior); } #else aciter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end()); for(; j!=j_end; ++j) { pprob.alt_indel.push_back(std::make_pair(j->first,(j->second + dopt.site_lnprior + allele_lnprior))); } #endif } double scale(std::max(pprob_nonsite,std::max(pprob.ref,pprob.indel))); if(is_use_alt_indel) { #if 0 if(pprob.is_alt) { if(scale < pprob.alt) scale = pprob.alt; } #else aiter j(pprob.alt_indel.begin()), j_end(pprob.alt_indel.end()); for(; j!=j_end; ++j) { if(scale < j->second) scale = j->second; } #endif } pprob_nonsite = std::exp(pprob_nonsite-scale); pprob.ref = std::exp(pprob.ref-scale); pprob.indel = std::exp(pprob.indel-scale); if(is_use_alt_indel) { #if 0 if(pprob.is_alt) { pprob.alt = std::exp(pprob.alt-scale); } #else aiter j(pprob.alt_indel.begin()), j_end(pprob.alt_indel.end()); for(; j!=j_end; ++j) { j->second = std::exp((j->second)-scale); } #endif } double sum(pprob_nonsite+pprob.ref+pprob.indel); if(is_use_alt_indel) { #if 0 if(pprob.is_alt) { sum += pprob.alt; } #else aciter j(pprob.alt_indel.begin()), j_end(pprob.alt_indel.end()); for(; j!=j_end; ++j) { sum += j->second; } #endif } pprob_nonsite /= sum; pprob.ref /= sum; pprob.indel /= sum; if(is_use_alt_indel) { #if 0 if(pprob.is_alt) { pprob.alt /= sum; } #else aiter j(pprob.alt_indel.begin()), j_end(pprob.alt_indel.end()); for(; j!=j_end; ++j) { j->second /= sum; } #endif } return pprob; }