// approximately unimodal under most pictures of the world // scan at maximum and build up a picture of the likelihood using log(n)*constant measurements, interpolate to linearize void PosteriorInference::InterpolateFrequencyScan(ShortStack &total_theory, bool update_frequency, int strand_key) { unsigned int num_reads = ResizeToMatch(total_theory); float fnum_reads = (float) num_reads; UpdateMaxFreqFromResponsibility(total_theory, strand_key); int eval_start = (int)(max_freq * num_reads); vector<unsigned int> samples; FibInterval(samples, eval_start, num_reads); unsigned int i_last = 0; eval_at_frequency[i_last] = (float)i_last / fnum_reads; log_posterior_by_frequency[i_last] = total_theory.PosteriorFrequencyLogLikelihood(eval_at_frequency[i_last], data_reliability, strand_key); int bottom = log_posterior_by_frequency[i_last]; int top = bottom; for (unsigned int i_dx = 1; i_dx < samples.size(); i_dx++) { unsigned int i_eval = samples[i_dx]; eval_at_frequency[i_eval] = (float)i_eval / fnum_reads; log_posterior_by_frequency[i_eval] = total_theory.PosteriorFrequencyLogLikelihood(eval_at_frequency[i_eval],data_reliability, strand_key); top = log_posterior_by_frequency[i_eval]; for (unsigned int i_mid = i_last + 1; i_mid < i_eval; i_mid++) { int delta_low = i_mid - i_last; int delta_hi = i_eval - i_last; eval_at_frequency[i_mid] = (float)i_mid / fnum_reads; log_posterior_by_frequency[i_mid] = (top * delta_low + bottom * delta_hi) / (delta_low + delta_hi); } bottom = top; i_last = i_eval; } FindMaxFrequency(update_frequency); scan_done = true; };
// do a hard classification as though the reads were independent // i.e. look more like the data in the BAM file void PosteriorInference::StartAtHardClassify(ShortStack &total_theory, bool update_frequency, float start_frequency) { // just to allocate ResizeToMatch(total_theory); if (update_frequency) { max_freq = start_frequency; max_ll = total_theory.PosteriorFrequencyLogLikelihood(max_freq, data_reliability, ALL_STRAND_KEY); } total_theory.UpdateResponsibility(max_freq, data_reliability); }
void PosteriorInference::DoPosteriorFrequencyScan(ShortStack &total_theory, bool update_frequency, int strand_key) { //cout << "ScanningFrequency" << endl; // posterior frequency inference given current data/likelihood pairing unsigned int num_reads = ResizeToMatch(total_theory); float fnum_reads = (float) num_reads; for (unsigned int i_eval = 0; i_eval < eval_at_frequency.size(); i_eval++) { eval_at_frequency[i_eval] = (float)i_eval / fnum_reads; log_posterior_by_frequency[i_eval] = total_theory.PosteriorFrequencyLogLikelihood(eval_at_frequency[i_eval], data_reliability, strand_key); } // if doing monomorphic eval, set frequency to begin with and don't update FindMaxFrequency(update_frequency); scan_done = true; // log_posterior now contains all frequency information inferred from the data }
void PosteriorInference::UpdateMaxFreqFromResponsibility(ShortStack &total_theory, int strand_key) { // skip time consuming scan and use responsibilities as cluster entry float max_freq = total_theory.FrequencyFromResponsibility(strand_key); max_ll = total_theory.PosteriorFrequencyLogLikelihood(max_freq, data_reliability, strand_key); scan_done = false; // didn't come from scan }