int ghmm_dpmodel_emission_table_size(ghmm_dpmodel* mo, int state_index) { /* the alphabet is over single sequences so get the maximal index for the lookup of emission probabilities and use it to determine the size of the lookup table */ int size = mo->size_of_alphabet[mo->s[state_index].alphabet]; return ghmm_dpmodel_pair(size - 1, size - 1, size, mo->s[state_index].offset_x, mo->s[state_index].offset_y) + 1; }
/*============================================================================*/ static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop, double * log_p, plocal_propagate_store_t * pv) { #define CUR_PROC "pviterbi_step" /* printf("---- propagate step -----\n"); */ int u, v, j, i; double value, max_value, previous_prob; /* int len_path = mo->N*len; the length of the path is not known apriori */ int start_x, start_y, stop_x, stop_y; double log_b_i, log_in_a_ij; cell * middle = NULL; int middle_x; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); middle_x = start_x + (stop_x - start_x) / 2; /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ init_phi_prop(pv, X, Y, start, stop); #ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { for (u = 0; u<=mo->max_offset_x; u++) { printf("row %i of phi\n", u); for (v = start_y - 1; v < stop_y; v++) { printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state)); } printf("\n\n"); } } #endif /* u, v > 0 */ /** THIS IS THE MAIN RECURRENCE **/ /* printf("Main loop x from %i to %i and y from %i to %i\n", start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/ for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, +1); set_end_of_first(pv, 0, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) { max_value = -DBL_MAX; set_end_of_first(pv, 0, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi_prop(pv, u, v, i, 1); } else set_phi_prop(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; /* for the last segment search for the maximum probability at the end of the two sequences */ if (stop == NULL){ for (j = 0; j < mo->N; j++){ #ifdef DEBUG /* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]); ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */ #endif if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 && get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j); } } } /* traceback for the interior segments have to start with the previous state of the middle beacuse the path has to be connected */ else { if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); } } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; } else { *log_p = max_value; } return middle; #undef CUR_PROC }
/*============================================================================*/ static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop) { #define CUR_PROC "init_phi_prop" int u, v, j, i, off_x, y; double value, max_value, previous_prob, log_b_i, log_in_a_ij ; int start_x, start_y, stop_x, stop_y, middle_x; ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); pv->start_x = start_x; pv->start_y = start_y; middle_x = start_x + (stop_x - start_x) / 2; /* to be sure that we do not look up something out of the bounds set the whole matrix to 1 */ /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) { pv->phi[off_x][y][j] = +1; } /* Inititalize the end_of_first matrix */ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) if (pv->end_of_first[off_x][y][j]) { /* m_free(pv->end_of_first[off_x][y][j]); */ pv->end_of_first[off_x][y][j] = NULL; } if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, 1); /** traceback for the propagate algorithm **/ set_end_of_first(pv, u, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_end_of_first(pv, u, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi_prop(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi_prop(pv, u, v, i, +1); else set_phi_prop(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v){ set_phi_prop(pv, u, v, i, mo->s[i].log_pi); #ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v)); #endif } if (get_phi_prop(pv, u, v, 0, 0, i) != 1) { set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i); } } } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length); #ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { max_value = -DBL_MAX; i = -1; y = -1; off_x = -1; int x; for (x = 0; x<=mo->max_offset_x; x++) for (v = - mo->max_offset_y; v<Y->length; v++) { for (j=0; j<mo->N; j++) { if (get_phi_prop(pv, x, v, x, 0, j) >= max_value && get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) { max_value = get_phi_prop(pv, x, v, x, 0, j); i = j; off_x = x; y = v; } } } printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x); printf("max log prob state %i at (%i, %i) = %f after pushback\n", i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i)); } #endif } /* End for u in X */ #undef CUR_PROC }
/*============================================================================*/ double ghmm_dpmodel_viterbi_logp(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, int *state_seq, int state_seq_len) { #define CUR_PROC "ghmm_dpmodel_viterbi_logp" int s, t, i, j, u, v; double log_p = 0.0; double log_b_i = 1.0; double log_in_a = 1.0; plocal_store_t *pv; /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, 0, 0); pviterbi_precompute(mo, pv); /* initial state */ t = 0; u = -1; v = -1; if (state_seq_len > t) { i = state_seq[t]; /* initial state probability */ /* log_p += log(mo->s[i].pi); */ log_p += mo->s[i].log_pi; if (log_p == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "the initial probability of state %i is zero\n", i); return 1.0;/* the initial prob is zero */ } /* consume the first characters of the sequences */ u += mo->s[i].offset_x; v += mo->s[i].offset_y; /* get the emission probability */ log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); if (log_b_i == 1.0) { /* chars cant be emitted */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t); return 1.0; } log_p += log_b_i; } else { /* there is no path.. */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "No path given!\n"); return 1.0; } /* rest of the sequence */ for (t=1; t<state_seq_len; t++) { j = i; /* predecessor state */ i = state_seq[t]; /* current state */ /* consume the next characters */ u += mo->s[i].offset_x; v += mo->s[i].offset_y; if (u >= X->length || v >= Y->length) { /* path consumes too many chars */ pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "path consumes too many chars\n"); return 1.0; } /* transition prob state j -> i*/ log_in_a = 1.0; for (s=0; s<mo->s[i].in_states; s++) { if (mo->s[i].in_id[s] == j) { log_in_a = sget_log_in_a(pv, i, s, X, Y, u, v); break; } } if (log_in_a == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "transition (%i -> %i) at t=%i not possible\n", j, i,t); return 1.0; /* transition not possible */ } /* emission probability */ log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); if (log_b_i == 1.0) { pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t); return 1.0; /* characters cant be emitted */ } log_p += log_in_a + log_b_i; } pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y); /* check if all of the sequences has been consumed */ if (u != X->length - 1 && v != Y->length - 1) { fprintf(stderr, "path consumes not all characters (%i of %i, %i of %i)\n", u + 1, X->length, v + 1, Y->length); return 1.0; } return log_p; #undef CUR_PROC } /* ghmm_dpmodel_viterbi_logp */
/*============================================================================*/ int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, int start_traceback_with) { #define CUR_PROC "ghmm_dpmodel_viterbi" int u, v, j, i, off_x, off_y, current_state_index; double value, max_value, previous_prob; plocal_store_t *pv; int *state_seq = NULL; int emission; double log_b_i, log_in_a_ij; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); /* printf("---- viterbi -----\n"); */ i_list * state_list; state_list = ighmm_list_init_list(); log_in_a = &sget_log_in_a; /* int len_path = mo->N*len; the length of the path is not known apriori */ /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, X->length, Y->length); if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_precompute(mo, pv); /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/ init_phi(pv, X, Y); /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v */ /** THIS IS THE MAIN RECURRENCE **/ for (u = mo->max_offset_x + 1; u < X->length; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); #ifdef DEBUG if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else set_phi(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ push_back_phi(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; ighmm_list_append(state_list, -1); /* if start_traceback_with is -1 (it is by default) search for the most likely state at the end of both sequences */ if (start_traceback_with == -1) { for (j = 0; j < mo->N; j++){ #ifdef DEBUG printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j)); #endif if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j); state_list->last->val = j; } } } /* this is the special traceback mode for the d & c algorithm that also connects the traceback to the first state of the rest of the path */ else { #ifdef DEBUG printf("D & C traceback from state %i!\n", start_traceback_with); printf("Last characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, X->length-1), ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, Y->length-1)); for (j = 0; j < mo->N; j++){ printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); } #endif max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with); if (max_value != 1 && max_value > -DBL_MAX) state_list->last->val = start_traceback_with; } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; /* Backtracing doesn't work, because state_seq[*] allocated with -1 */ /* for (t = len - 2; t >= 0; t--) state_list->last->val = -1; */ } else { /* Backtracing, should put DEL path nicely */ *log_p = max_value; /* removed the handling of silent states here */ /* start trace back at the end of both sequences */ u = X->length - 1; v = Y->length - 1; current_state_index = state_list->first->val; off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { /* while (u > 0 && v > 0) { */ /* look up the preceding state and save it in the first position of the state list */ /* printf("Current state %i at (%i,%i) -> preceding state %i\n", current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */ /* update the current state */ current_state_index = get_psi(pv, u, v, current_state_index); if (current_state_index != -1) ighmm_list_insert(state_list, current_state_index); /* move in the alignment matrix */ u -= off_x; v -= off_y; /* get the next offsets */ off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; } } /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , mo->max_offset_y); /* printf("After traceback: last state = %i\n", state_list->last->val); */ state_seq = ighmm_list_to_array(state_list); *path_length = state_list->length; /* PRINT PATH */ /* fprintf(stderr, "Viterbi path: " ); */ /* int t; */ /* for(t=0; t < *path_length; t++) */ /* if (state_seq[t] >= 0) fprintf(stderr, " %d ", state_seq[t]); */ /* fprintf(stderr, "\n Freeing ... \n"); */ return (state_seq); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, mo->max_offset_y); m_free(state_seq); ighmm_list_free(state_list); return NULL; #undef CUR_PROC } /* viterbi */
/*============================================================================*/ static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) { #ifdef DEBUG int emission; #endif int u, v, j, i, off_x, y; double log_in_a_ij; double value, max_value, previous_prob, log_b_i; /* printf("ghmm_dpmodel_viterbi init\n"); */ ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a; /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) { pv->phi[off_x][y][j] = +1; } if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one iteration start is u=-1 v=-1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v or equally for offset_y == 0 */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } /* for each state i */ for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi(pv, u, v, i, +1); else set_phi(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { set_phi(pv, u, v, i, mo->s[i].log_pi); #ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v)); #endif } if (get_phi(pv, u, v, 0, 0, i) != 1) set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i); } } /* if (v == 0) { printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]); } */ } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi(pv, Y->length); } /* End for u in X */ }