// Calculate the expected ratio of reads observed to support each
// allele. Note that for sites and single breakpoints this is expected
// to match the sample allele ratio, however for indels this can
// change as a funciton of indel and read length.
//
// Note this routine does not accoung for overlapping indels
//
static
void
get_het_observed_allele_ratio(const unsigned read_length,
                              const unsigned min_overlap,
                              const indel_key& ik,
                              const double het_allele_ratio,
                              double& log_ref_prob,
                              double& log_indel_prob) {

    assert((ik.type==INDEL::INSERT) ||
           (ik.type==INDEL::DELETE) ||
           (ik.type == INDEL::SWAP));

    // the expected relative read depth for two breakpoints separated by a distance of 0:
    const unsigned base_expect( (read_length+1)<(2*min_overlap) ? 0 : (read_length+1)-(2*min_overlap) );

    // Get expected relative read depth for the shorter and longer
    // paths of a general sequence replacement. Note this includes
    // basic insertions and deletions, in these cases
    // spath_break_distance is 0 and spath_expect equals base_expect:
    //
    const double ref_path_expect(base_expect+std::min(ik.delete_length(),base_expect));
    const double indel_path_expect(base_expect+std::min(ik.insert_length(),base_expect));
    const double ref_path_term((1-het_allele_ratio)*ref_path_expect);
    const double indel_path_term(het_allele_ratio*indel_path_expect);
    const double total_path_term(ref_path_term+indel_path_term);

    if (total_path_term>0) {
        const double indel_prob(indel_path_term/total_path_term);
        log_ref_prob=std::log(1.-indel_prob);
        log_indel_prob=std::log(indel_prob);
    }
}
bool
is_indel_conflict(const indel_key& ik1,
                  const indel_key& ik2) {

    // add one to the end_pos of all indels to prevent immediately
    // adjacent indels in the final alignments:
    pos_range pr1(ik1.open_pos_range());
    pr1.end_pos++;
    pos_range pr2(ik2.open_pos_range());
    pr2.end_pos++;

    return pr1.is_range_intersect(pr2);
}
static
void
set_delete_seq(const indel_key& ik,
               const reference_contig_segment& ref,
               std::string& seq)
{
    copy_ref_subseq(ref,ik.pos,ik.right_pos(),seq);
}
bool
is_range_adjacent_indel_breakpoints(const known_pos_range read_pr,
                                    const indel_key& ik) {

    if(read_pr.is_range_intersect(pos_range(ik.pos-1,ik.pos+1))) return true;
    const pos_t rpos(ik.right_pos());
    if(ik.pos==rpos) return false;
    return (read_pr.is_range_intersect(pos_range(rpos-1,rpos+1)));
}
/// get the indel cigar and ref and indel strings used in the indel
/// summary line output
///
static
void
get_vcf_summary_strings(const indel_key& ik,
                        const indel_data& id,
                        const reference_contig_segment& ref,
                        std::string& vcf_indel_seq,
                        std::string& vcf_ref_seq) {

    if       (ik.is_breakpoint()) {
        if       (ik.type == INDEL::BP_LEFT) {
            copy_ref_subseq(ref,ik.pos-1,ik.pos,vcf_ref_seq);
            vcf_indel_seq = vcf_ref_seq + id.get_insert_seq() + '.';
        } else if(ik.type == INDEL::BP_RIGHT) {
            copy_ref_subseq(ref,ik.pos,ik.pos+1,vcf_ref_seq);
            vcf_indel_seq = '.' + id.get_insert_seq() + vcf_ref_seq;
        } else {
            assert(0);
        }
    } else {
        copy_ref_subseq(ref,ik.pos-1,ik.pos+ik.delete_length(),vcf_ref_seq);
        copy_ref_subseq(ref,ik.pos-1,ik.pos,vcf_indel_seq);
        vcf_indel_seq += id.get_insert_seq();
    }
}
void
get_starling_indel_report_info(const indel_key& ik,
                               const indel_data& id,
                               const reference_contig_segment& ref,
                               starling_indel_report_info& iri) {

    // indel summary info
    get_indel_summary_strings(ik,id,ref,iri.desc,iri.indel_seq,iri.ref_seq);
    get_vcf_summary_strings(ik,id,ref,iri.vcf_indel_seq,iri.vcf_ref_seq);

    iri.it=ik.type;

    const pos_t indel_begin_pos(ik.pos);
    const pos_t indel_end_pos(ik.right_pos());

    // reference context:
    {
        static const unsigned INDEL_CONTEXT_SIZE(10);

        if(ik.type != INDEL::BP_RIGHT) {
            iri.ref_upstream.clear();
            for(pos_t i(indel_begin_pos-static_cast<pos_t>(INDEL_CONTEXT_SIZE)); i<indel_begin_pos; ++i) {
                iri.ref_upstream += ref.get_base(i);
            }
        } else {
            iri.ref_upstream = "N/A";
        }
        if(ik.type != INDEL::BP_LEFT) {
            iri.ref_downstream.clear();
            for(pos_t i(indel_end_pos); i<(indel_end_pos+static_cast<pos_t>(INDEL_CONTEXT_SIZE)); ++i) {
                iri.ref_downstream += ref.get_base(i);
            }
        } else {
            iri.ref_downstream = "N/A";
        }
    }

    // repeat analysis:
    set_repeat_info(ik,ref,iri);

    // interupted hpol compuation:
    iri.ihpol=get_interupted_hpol_size(indel_begin_pos-1,ref);
    iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_begin_pos,ref));
    if(indel_begin_pos != indel_end_pos) {
        iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos-1,ref));
        iri.ihpol=std::max(iri.ihpol,get_interupted_hpol_size(indel_end_pos,ref));
    }
}
void
indel_digt_caller::
get_indel_digt_lhood(const starling_options& opt,
                     const starling_deriv_options& dopt,
                     const starling_sample_options& sample_opt,
                     const double indel_error_prob,
                     const double ref_error_prob,
                     const indel_key& ik,
                     const indel_data& id,
                     const bool is_het_bias,
                     const double het_bias,
                     const bool is_tier2_pass,
                     const bool is_use_alt_indel,
                     double* const lhood) {

    static const double loghalf(-std::log(2.));

    for (unsigned gt(0); gt<STAR_DIINDEL::SIZE; ++gt) lhood[gt] = 0.;

    const bool is_breakpoint(ik.is_breakpoint());

    const double indel_error_lnp(std::log(indel_error_prob));
    const double indel_real_lnp(std::log(1.-indel_error_prob));
    const double ref_error_lnp(std::log(ref_error_prob));
    const double ref_real_lnp(std::log(1.-ref_error_prob));

    //    typedef read_path_scores::alt_indel_t::const_iterator aiter;

    typedef indel_data::score_t::const_iterator siter;
    siter it(id.read_path_lnp.begin()), it_end(id.read_path_lnp.end());
    for (; it!=it_end; ++it) {
        const read_path_scores& path_lnp(it->second);

        // optionally skip tier2 data:
        if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue;

        // get alt path lnp:
        double alt_path_lnp(path_lnp.ref);
#if 0
        if (is_use_alt_indel && path_lnp.is_alt &&
            (path_lnp.alt > alt_path_lnp)) {
            alt_path_lnp=path_lnp.alt;
        }
#else
        if (is_use_alt_indel and (not path_lnp.alt_indel.empty()) ) {
            typedef read_path_scores::alt_indel_t::const_iterator aiter;
            aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end());
            for (; j!=j_end; ++j) {
                if (j->second>alt_path_lnp) alt_path_lnp=j->second;
            }
        }
#endif

        const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp));
        const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp));

        // allele ratio convention is that the indel occurs at the
        // het_allele ratio and the alternate allele occurs at
        // (1-het_allele_ratio):

        double log_ref_prob(loghalf);
        double log_indel_prob(loghalf);
        if (not is_breakpoint) {
            static const double het_allele_ratio(0.5);
            get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank,
                                          ik,het_allele_ratio,log_ref_prob,log_indel_prob);
        }
        const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob));

        lhood[STAR_DIINDEL::NOINDEL] += integrate_out_sites(dopt,path_lnp.nsite,noindel_lnp,is_tier2_pass);
        lhood[STAR_DIINDEL::HOM]     += integrate_out_sites(dopt,path_lnp.nsite,hom_lnp,is_tier2_pass);
        lhood[STAR_DIINDEL::HET]     += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass);

#ifdef DEBUG_INDEL_CALL
        //log_os << std::setprecision(8);
        //log_os << "INDEL_CALL i,ref_lnp,indel_lnp,lhood(noindel),lhood(hom),lhood(het): " << i << " " << path_lnp.ref << " " << path_lnp.indel << " " << lhood[STAR_DIINDEL::NOINDEL] << " " << lhood[STAR_DIINDEL::HOM] << " " << lhood[STAR_DIINDEL::HET] << "\n";
#endif
    }


    if (is_het_bias) {
        // loop is currently setup to assume a uniform het ratio subgenotype prior
        const unsigned n_bias_steps(1+static_cast<unsigned>(het_bias/opt.het_bias_max_ratio_inc));
        const double ratio_increment(het_bias/static_cast<double>(n_bias_steps));
        for (unsigned step(0); step<n_bias_steps; ++step) {
            const double het_ratio(0.5+(step+1)*ratio_increment);
            increment_het_ratio_lhood(opt,dopt,sample_opt,
                                      indel_error_lnp,indel_real_lnp,
                                      ref_error_lnp,ref_real_lnp,
                                      ik,id,het_ratio,is_tier2_pass,is_use_alt_indel,lhood);
        }

        const unsigned n_het_subgt(1+2*n_bias_steps);
        const double subgt_log_prior(std::log(static_cast<double>(n_het_subgt)));
        lhood[STAR_DIINDEL::HET] -= subgt_log_prior;
    }
}
void
indel_digt_caller::
get_high_low_het_ratio_lhood(const starling_options& /*opt*/,
                             const starling_deriv_options& dopt,
                             const starling_sample_options& sample_opt,
                             const double indel_error_lnp,
                             const double indel_real_lnp,
                             const double ref_error_lnp,
                             const double ref_real_lnp,
                             const indel_key& ik,
                             const indel_data& id,
                             const double het_ratio,
                             const bool is_tier2_pass,
                             const bool is_use_alt_indel,
                             double& het_lhood_high,
                             double& het_lhood_low) {

    // handle het ratio and its complement in one step:
    const double chet_ratio(1.-het_ratio);

    const double log_het_ratio(std::log(het_ratio));
    const double log_chet_ratio(std::log(chet_ratio));

    const bool is_breakpoint(ik.is_breakpoint());

    het_lhood_high=0;
    het_lhood_low=0;

    //    typedef read_path_scores::alt_indel_t::const_iterator aiter;

    typedef indel_data::score_t::const_iterator siter;
    siter i(id.read_path_lnp.begin()), i_end(id.read_path_lnp.end());
    for (; i!=i_end; ++i) {
        const read_path_scores& path_lnp(i->second);

        // optionally skip tier2 data:
        if ((! is_tier2_pass) && (! path_lnp.is_tier1_read)) continue;

        // get alt path lnp:
        double alt_path_lnp(path_lnp.ref);
#if 0
        if (is_use_alt_indel && path_lnp.is_alt &&
            (path_lnp.alt > alt_path_lnp)) {
            alt_path_lnp=path_lnp.alt;
        }
#else
        if (is_use_alt_indel && (! path_lnp.alt_indel.empty()) ) {
            typedef read_path_scores::alt_indel_t::const_iterator aiter;
            aiter j(path_lnp.alt_indel.begin()), j_end(path_lnp.alt_indel.end());
            for (; j!=j_end; ++j) {
                if (j->second>alt_path_lnp) alt_path_lnp=j->second;
            }
        }
#endif

        const double noindel_lnp(log_sum(alt_path_lnp+ref_real_lnp,path_lnp.indel+indel_error_lnp));
        const double hom_lnp(log_sum(alt_path_lnp+ref_error_lnp,path_lnp.indel+indel_real_lnp));

        // allele ratio convention is that the indel occurs at the
        // het_allele ratio and the alternate allele occurs at
        // (1-het_allele_ratio):
        {
            double log_ref_prob(log_chet_ratio);
            double log_indel_prob(log_het_ratio);
            if (! is_breakpoint) {
                get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank,
                                              ik,het_ratio,log_ref_prob,log_indel_prob);
            }
            const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob));

            het_lhood_low += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass);
        }

        {
            double log_ref_prob(log_het_ratio);
            double log_indel_prob(log_chet_ratio);
            if (! is_breakpoint) {
                get_het_observed_allele_ratio(path_lnp.read_length,sample_opt.min_read_bp_flank,
                                              ik,chet_ratio,log_ref_prob,log_indel_prob);
            }
            const double het_lnp(log_sum(noindel_lnp+log_ref_prob,hom_lnp+log_indel_prob));

            het_lhood_high += integrate_out_sites(dopt,path_lnp.nsite,het_lnp,is_tier2_pass);
        }
    }
}
static
void
set_repeat_info(const indel_key& ik,
                const reference_contig_segment& ref,
                starling_indel_report_info& iri)
{
    iri.is_repeat_unit = false;
    iri.repeat_unit = "N/A";
    iri.ref_repeat_count = 0;
    iri.indel_repeat_count = 0;

    if(! ((iri.it == INDEL::INSERT) ||
          (iri.it == INDEL::DELETE) ||
          (iri.it == INDEL::SWAP))) return;

    unsigned insert_repeat_count(0);
    unsigned delete_repeat_count(0);

    if       (iri.it == INDEL::INSERT) {
        get_seq_repeat_unit(iri.indel_seq,iri.repeat_unit,insert_repeat_count);
    } else if(iri.it == INDEL::DELETE) {
        get_seq_repeat_unit(iri.ref_seq,iri.repeat_unit,delete_repeat_count);
    } else if(iri.it == INDEL::SWAP) {
        std::string insert_ru;
        std::string delete_ru;
        get_seq_repeat_unit(iri.indel_seq,insert_ru,insert_repeat_count);
        get_seq_repeat_unit(iri.ref_seq,delete_ru,delete_repeat_count);
        if((insert_ru != delete_ru) || insert_ru.empty()) return;

        iri.repeat_unit=insert_ru;
    } else {
        assert(0);
    }

    // count repeats in contextual sequence:
    unsigned indel_context_repeat_count(0);
    {
        const pos_t indel_begin_pos(ik.pos);
        const pos_t indel_end_pos(ik.right_pos());
        const int repeat_unit_size(static_cast<int>(iri.repeat_unit.size()));

        // count upstream repeats:
        for(pos_t i(indel_begin_pos-repeat_unit_size); i>=0; i-=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }

        // count downstream repeats:
        const pos_t rs(ref.end());
        for(pos_t i(indel_end_pos); (i+static_cast<pos_t>(repeat_unit_size)-1)<rs; i+=repeat_unit_size) {
            bool is_repeat(true);
            for(int j(0); j<repeat_unit_size; ++j) {
                if(ref.get_base(i+j) != iri.repeat_unit[j]) {
                    is_repeat = false;
                    break;
                }
            }
            if(! is_repeat) break;
            indel_context_repeat_count += 1;
        }
    }

    iri.is_repeat_unit = true;
    iri.ref_repeat_count = indel_context_repeat_count+delete_repeat_count;
    iri.indel_repeat_count = indel_context_repeat_count+insert_repeat_count;
}
// 99% of this task is taking care of indel normalization
static
bool
convert_indel_to_htype(const indel_key& ik,
                       const indel_data& /*id*/,
                       const read_segment& rseg,
                       const reference_contig_segment& ref,
                       htype_element& he) {

    he.clear();

    // get best alignment:
    const alignment* alptr(rseg.get_best_alignment());

    assert(alptr);
    const alignment& al(*alptr);

    // Check that alignment is compatible with indel. Many
    // cases where this fails will be for 'private'
    // indels. The posterior above is over all candidate
    // indels, so one candidate may be the best for this read,
    // *but* the best alignment contains a private indel
    // instead.
    //
    pos_range read_indel_pr;
    if (! is_indel_in_alignment(al,ik,read_indel_pr)) return false;

    const bam_seq read_seq(rseg.get_bam_read());

    const rc_segment_bam_seq ref_bseq(ref);
    pos_range ref_indel_pr(ik.open_pos_range());

    assert(! read_indel_pr.is_empty());
    assert(! ref_indel_pr.is_empty());

    // normalization function adjusts ranges:
    //    normalize_indel(read_indel_pr,ref_indel_pr,read_seq,ref_bseq,read_indel_pr,ref_indel_pr);

    assert(! read_indel_pr.is_empty());
    assert(! ref_indel_pr.is_empty());

    // build he:
    if (ref_indel_pr.is_complete()) {
        he.delete_length=ref_indel_pr.end_pos-ref_indel_pr.begin_pos;
    }

    if (!  read_indel_pr.is_begin_pos) {
        he.pos=read_indel_pr.end_pos;
    } else {
        he.pos=read_indel_pr.begin_pos;
    }

    if       (! read_indel_pr.is_end_pos) {
        he.open_end=OPEN::RIGHT;
    } else if (! read_indel_pr.is_begin_pos) {
        he.open_end=OPEN::LEFT;
    }

    {   // copy into htype element seq (don't worry about efficiency for now)
        pos_range pr(read_indel_pr);
        if (! pr.is_complete()) {
            const pos_range nonclip_pr(get_nonclip_range(al.path));
            assert(nonclip_pr.is_complete());
            if       (! pr.is_begin_pos) {
                pr.set_begin_pos(nonclip_pr.begin_pos);
            } else {
                pr.set_end_pos(nonclip_pr.end_pos);
            }
        }

        assert(pr.begin_pos<=pr.end_pos && pr.begin_pos>=0);
        for (pos_t i(pr.begin_pos); i<pr.end_pos; ++i) {
            he.seq.push_back(read_seq.get_char(i));
        }
    }

    if ((he.delete_length==0) &&
        (he.insert_length()==0)) {
        he.clear();
        return false;
    }

    return true;
}