void add_state_transition(TrainingData& td, char from, char to) { int f_idx = statechar2index(from); int t_idx = statechar2index(to); int count = get(td.state_transitions, f_idx, t_idx); set(td.state_transitions, f_idx, t_idx, count + 1); }
void TransitionParameters::add_transition_observation(char state_from, char state_to) { int f_idx = statechar2index(state_from); int t_idx = statechar2index(state_to); int count = get(training_data.state_transitions, f_idx, t_idx); set(training_data.state_transitions, f_idx, t_idx, count + 1); }
void KHMMParameters::train() { TrainingData& td = training_data; // // Profile HMM transitions // fprintf(stderr, "TRANSITIONS\n"); size_t sum_m_not_k = get(td.state_transitions, statechar2index('M'), statechar2index('M')) + get(td.state_transitions, statechar2index('M'), statechar2index('E')); size_t me = get(td.state_transitions, statechar2index('M'), statechar2index('E')); double p_me_not_k = (double)me / sum_m_not_k; fprintf(stderr, "M->E|not_k: %lf\n", p_me_not_k); size_t sum_e = 0; for(int j = 0; j < td.state_transitions.n_cols; ++j) { sum_e += get(td.state_transitions, statechar2index('E'), j); } size_t ee = get(td.state_transitions, statechar2index('E'), statechar2index('E')); double p_ee = (double)ee / sum_e; fprintf(stderr, "E->E: %lf\n", p_ee); for(int i = 0; i < td.state_transitions.n_rows; ++i) { fprintf(stderr, "\t%c: ", "MEK"[i]); for(int j = 0; j < td.state_transitions.n_cols; ++j) { fprintf(stderr, "%d ", get(td.state_transitions, i, j)); } fprintf(stderr, "\n"); } if(sum_e == 0 || sum_m_not_k == 0) { // insufficient data to train, use defaults return; } trans_m_to_e_not_k = p_me_not_k; trans_e_to_e = p_ee; // // Signal-dependent skip probability // // Initialize observations with pseudocounts from the current model size_t num_bins = skip_probabilities.size(); uint32_t pseudocount = 100; std::vector<double> total_observations(num_bins, 0.0f); std::vector<double> skip_observations(num_bins, 0.0f); for(size_t bin = 0; bin < num_bins; bin++) { skip_observations[bin] = skip_probabilities[bin] * pseudocount; total_observations[bin] = pseudocount; } for(size_t oi = 0; oi < td.kmer_transitions.size(); ++oi) { const KmerTransitionObservation& to = td.kmer_transitions[oi]; bool is_skip = to.state == 'K'; size_t bin = get_bin(*this, to.level_1, to.level_2); skip_observations[bin] += is_skip; total_observations[bin] += 1; } // Update probabilities for(size_t bin = 0; bin < num_bins; bin++) { skip_probabilities[bin] = skip_observations[bin] / total_observations[bin]; fprintf(stderr, "SKIPLEARN -- bin[%zu] %.3lf %.3lf %.3lf\n", bin, skip_observations[bin], total_observations[bin], skip_probabilities[bin]); } }