/*============================================================================*/ static double log_b_prop(plocal_propagate_store_t * pv, int state, int emission) { #ifdef DEBUG if (state > pv->mo->N) fprintf(stderr, "log_b_prop: State index out of bounds %i > %i\n", state, pv->mo->N); if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state)) fprintf(stderr, "log_b_prop: Emission index out of bounds %i > %i for state %i\n", emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state); #endif return pv->log_b[state][emission]; }
/*============================================================================*/ static void pviterbi_prop_precompute (ghmm_dpmodel *mo, plocal_propagate_store_t *pv) { #define CUR_PROC "pviterbi_precompute" int i, j, emission, t_class; /* Precomputing the log(a_ij) */ for (j = 0; j < mo->N; j++) for (i = 0; i < mo->s[j].in_states; i++) for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){ if ( mo->s[j].in_a[i][t_class] == 0.0 ) pv->log_in_a[j][i][t_class] = +1; else pv->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] ); } /* Precomputing the log emission probabilities for each state*/ for (j = 0; j < mo->N; j++) { for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) { if (1) { if ( mo->s[j].b[emission] == 0.0 ) /* DBL_EPSILON ? */ pv->log_b[j][emission] = +1; else pv->log_b[j][emission] = log( mo->s[j].b[emission] ); } else{ pv->log_b[j][emission] = 0.0; } } pv->log_b[j][emission] = 1; /* the last field is for invalid emissions */ } #undef CUR_PROC }/* viterbi_precompute */
/*============================================================================*/ static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) { int j, k; ghmm_dpmodel * mo; printf("Local store for pair HMM viterbi algorithm\n"); printf("Log in a:\n"); mo = pv->mo; for (j = 0; j < mo->N; j++){ printf("state %i in states %i\n", j, mo->s[j].in_states); for (k=0; k<mo->s[j].in_states; k++) printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/); } printf("Log b:\n"); for (j = 0; j < mo->N; j++){ printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j)); for (k=0; k<ghmm_dpmodel_emission_table_size(mo, j); k++) printf("Emission prob char: %i %f\n", k, pv->log_b[j][k]); } }
/*============================================================================*/ static plocal_propagate_store_t * pviterbi_propagate_alloc (ghmm_dpmodel *mo, int len_y) { #define CUR_PROC "pviterbi_propagate_alloc" plocal_propagate_store_t* v = NULL; int i, j, k; ARRAY_CALLOC (v, 1); v->mo = mo; v->len_y = len_y; /* Allocate the log_in_a's -> individal lenghts */ ARRAY_CALLOC (v->log_in_a, mo->N); /* first index of log_in_a: target state */ for (j = 0; j < mo->N; j++){ /* second index: source state */ ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states); for (i=0; i<mo->s[j].in_states; i++) { /* third index: transition classes of source state */ ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses); } } ARRAY_CALLOC (v->log_b, mo->N); for (j=0; j<mo->N; j++) { ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1); } if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N); if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} ARRAY_CALLOC (v->phi_new, mo->N); ARRAY_CALLOC (v->end_of_first, mo->max_offset_x + 1); for (j=0; j<mo->max_offset_x + 1; j++) { ARRAY_CALLOC (v->end_of_first[j], len_y + mo->max_offset_y + 1); for (i=0; i<len_y + mo->max_offset_y + 1; i++) { ARRAY_CALLOC (v->end_of_first[j][i], mo->N); for (k=0; k<mo->N; k++) v->end_of_first[j][i][k] = NULL; /*ARRAY_CALLOC (v->end_of_first[j][i][k], 1);*/ } } v->topo_order_length = 0; ARRAY_CALLOC (v->topo_order, mo->N); return(v); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ pviterbi_propagate_free(&v, mo->N, mo->max_offset_x, mo->max_offset_y, len_y); return(NULL); #undef CUR_PROC }
/*============================================================================*/ static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop, double * log_p, plocal_propagate_store_t * pv) { #define CUR_PROC "pviterbi_step" /* printf("---- propagate step -----\n"); */ int u, v, j, i; double value, max_value, previous_prob; /* int len_path = mo->N*len; the length of the path is not known apriori */ int start_x, start_y, stop_x, stop_y; double log_b_i, log_in_a_ij; cell * middle = NULL; int middle_x; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); middle_x = start_x + (stop_x - start_x) / 2; /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ init_phi_prop(pv, X, Y, start, stop); #ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { for (u = 0; u<=mo->max_offset_x; u++) { printf("row %i of phi\n", u); for (v = start_y - 1; v < stop_y; v++) { printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state)); } printf("\n\n"); } } #endif /* u, v > 0 */ /** THIS IS THE MAIN RECURRENCE **/ /* printf("Main loop x from %i to %i and y from %i to %i\n", start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/ for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, +1); set_end_of_first(pv, 0, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) { max_value = -DBL_MAX; set_end_of_first(pv, 0, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi_prop(pv, u, v, i, 1); } else set_phi_prop(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; /* for the last segment search for the maximum probability at the end of the two sequences */ if (stop == NULL){ for (j = 0; j < mo->N; j++){ #ifdef DEBUG /* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]); ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */ #endif if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 && get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j); } } } /* traceback for the interior segments have to start with the previous state of the middle beacuse the path has to be connected */ else { if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) { max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state); } } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; } else { *log_p = max_value; } return middle; #undef CUR_PROC }
/*============================================================================*/ static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y, cell * start, cell * stop) { #define CUR_PROC "init_phi_prop" int u, v, j, i, off_x, y; double value, max_value, previous_prob, log_b_i, log_in_a_ij ; int start_x, start_y, stop_x, stop_y, middle_x; ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a_prop; init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y); pv->start_x = start_x; pv->start_y = start_y; middle_x = start_x + (stop_x - start_x) / 2; /* to be sure that we do not look up something out of the bounds set the whole matrix to 1 */ /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) { pv->phi[off_x][y][j] = +1; } /* Inititalize the end_of_first matrix */ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) if (pv->end_of_first[off_x][y][j]) { /* m_free(pv->end_of_first[off_x][y][j]); */ pv->end_of_first[off_x][y][j] = NULL; } if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = start_y - mo->max_offset_y; v < stop_y; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix) **/ set_phi_prop(pv, u, v, j, 1); /** traceback for the propagate algorithm **/ set_end_of_first(pv, u, v, j, NULL); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_end_of_first(pv, u, v, i, NULL); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; /* Critical point for the propagate algorithm if we are at the middle point of sequence X store this at the end point of the first alignment */ if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) { cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), v - (mo->s[i].offset_y - 1), i, mo->s[i].in_id[j], previous_prob, log_in_a_ij); if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) { cell * old = get_end_of_first(pv, u, v, 0, 0, i); m_free(old); } set_end_of_first(pv, u, v, i, end_of_first); } else { /* at all other points simply propagate the values on */ set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j])); } } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi_prop(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi_prop(pv, u, v, i, +1); else set_phi_prop(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 + start_y == v){ set_phi_prop(pv, u, v, i, mo->s[i].log_pi); #ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v)); #endif } if (get_phi_prop(pv, u, v, 0, 0, i) != 1) { set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i); } } } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi_prop(pv, Y->length); #ifdef DEBUG if (start != NULL && mo->s[start->state].offset_y == 0) { max_value = -DBL_MAX; i = -1; y = -1; off_x = -1; int x; for (x = 0; x<=mo->max_offset_x; x++) for (v = - mo->max_offset_y; v<Y->length; v++) { for (j=0; j<mo->N; j++) { if (get_phi_prop(pv, x, v, x, 0, j) >= max_value && get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) { max_value = get_phi_prop(pv, x, v, x, 0, j); i = j; off_x = x; y = v; } } } printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x); printf("max log prob state %i at (%i, %i) = %f after pushback\n", i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i)); } #endif } /* End for u in X */ #undef CUR_PROC }
/*============================================================================*/ int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, int start_traceback_with) { #define CUR_PROC "ghmm_dpmodel_viterbi" int u, v, j, i, off_x, off_y, current_state_index; double value, max_value, previous_prob; plocal_store_t *pv; int *state_seq = NULL; int emission; double log_b_i, log_in_a_ij; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); /* printf("---- viterbi -----\n"); */ i_list * state_list; state_list = ighmm_list_init_list(); log_in_a = &sget_log_in_a; /* int len_path = mo->N*len; the length of the path is not known apriori */ /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, X->length, Y->length); if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_precompute(mo, pv); /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/ init_phi(pv, X, Y); /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v */ /** THIS IS THE MAIN RECURRENCE **/ for (u = mo->max_offset_x + 1; u < X->length; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); #ifdef DEBUG if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else set_phi(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ push_back_phi(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; ighmm_list_append(state_list, -1); /* if start_traceback_with is -1 (it is by default) search for the most likely state at the end of both sequences */ if (start_traceback_with == -1) { for (j = 0; j < mo->N; j++){ #ifdef DEBUG printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j)); #endif if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j); state_list->last->val = j; } } } /* this is the special traceback mode for the d & c algorithm that also connects the traceback to the first state of the rest of the path */ else { #ifdef DEBUG printf("D & C traceback from state %i!\n", start_traceback_with); printf("Last characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, X->length-1), ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, Y->length-1)); for (j = 0; j < mo->N; j++){ printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); } #endif max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with); if (max_value != 1 && max_value > -DBL_MAX) state_list->last->val = start_traceback_with; } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; /* Backtracing doesn't work, because state_seq[*] allocated with -1 */ /* for (t = len - 2; t >= 0; t--) state_list->last->val = -1; */ } else { /* Backtracing, should put DEL path nicely */ *log_p = max_value; /* removed the handling of silent states here */ /* start trace back at the end of both sequences */ u = X->length - 1; v = Y->length - 1; current_state_index = state_list->first->val; off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { /* while (u > 0 && v > 0) { */ /* look up the preceding state and save it in the first position of the state list */ /* printf("Current state %i at (%i,%i) -> preceding state %i\n", current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */ /* update the current state */ current_state_index = get_psi(pv, u, v, current_state_index); if (current_state_index != -1) ighmm_list_insert(state_list, current_state_index); /* move in the alignment matrix */ u -= off_x; v -= off_y; /* get the next offsets */ off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; } } /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , mo->max_offset_y); /* printf("After traceback: last state = %i\n", state_list->last->val); */ state_seq = ighmm_list_to_array(state_list); *path_length = state_list->length; /* PRINT PATH */ /* fprintf(stderr, "Viterbi path: " ); */ /* int t; */ /* for(t=0; t < *path_length; t++) */ /* if (state_seq[t] >= 0) fprintf(stderr, " %d ", state_seq[t]); */ /* fprintf(stderr, "\n Freeing ... \n"); */ return (state_seq); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, mo->max_offset_y); m_free(state_seq); ighmm_list_free(state_list); return NULL; #undef CUR_PROC } /* viterbi */
/*============================================================================*/ static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) { #ifdef DEBUG int emission; #endif int u, v, j, i, off_x, y; double log_in_a_ij; double value, max_value, previous_prob, log_b_i; /* printf("ghmm_dpmodel_viterbi init\n"); */ ghmm_dpmodel * mo = pv->mo; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); log_in_a = &sget_log_in_a; /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/ for (off_x=0; off_x<mo->max_offset_x + 1; off_x++) for (y=0; y<Y->length + mo->max_offset_y + 1; y++) for (j=0; j<mo->N; j++) { pv->phi[off_x][y][j] = +1; } if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */ /*p__viterbi_silent( mo, t=0, v);*/ } /*for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for( i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); }*/ /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */ /* this is in principle the same as the main recurrence but adds initial probabilities to states that cannot be inhabitated at u=0, v=0 because of greater offsets than one iteration start is u=-1 v=-1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v or equally for offset_y == 0 */ /* u, v <= max offsets */ for (u = -1; u <= mo->max_offset_x; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } /* for each state i */ for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } #ifdef DEBUG emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* this is the difference from the main loop: check whether this state could be an initial state and add the initial probability */ if (log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else { if (max_value == -DBL_MAX) set_phi(pv, u, v, i, +1); else set_phi(pv, u, v, i, max_value); /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { */ if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && mo->s[i].offset_y - 1 == v) { set_phi(pv, u, v, i, mo->s[i].log_pi); #ifdef DEBUG printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i)); printf("Characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v)); #endif } if (get_phi(pv, u, v, 0, 0, i) != 1) set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i); } } /* if (v == 0) { printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]); } */ } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ /* push back the old phi values */ push_back_phi(pv, Y->length); } /* End for u in X */ }