i_list * ighmm_list_init_list() { #define CUR_PROC "ighmm_list_init_list" i_list * list; ARRAY_CALLOC (list, 1); list->first = NULL; list->last = NULL; list->length = 0; return list; STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ighmm_list_free(list); return NULL; #undef CUR_PROC }
/*============================================================================*/ int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y, double *log_p, int *path_length, int start_traceback_with) { #define CUR_PROC "ghmm_dpmodel_viterbi" int u, v, j, i, off_x, off_y, current_state_index; double value, max_value, previous_prob; plocal_store_t *pv; int *state_seq = NULL; int emission; double log_b_i, log_in_a_ij; double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int); /* printf("---- viterbi -----\n"); */ i_list * state_list; state_list = ighmm_list_init_list(); log_in_a = &sget_log_in_a; /* int len_path = mo->N*len; the length of the path is not known apriori */ /* if (mo->model_type & kSilentStates && */ /* mo->silent != NULL && */ /* mo->topo_order == NULL) { */ /* ghmm_dmodel_topo_order( mo ); */ /* } */ /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */ pv = pviterbi_alloc(mo, X->length, Y->length); if (!pv) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* Precomputing the log(a_ij) and log(bj(ot)) */ pviterbi_precompute(mo, pv); /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/ init_phi(pv, X, Y); /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 which corresponds to a series of gap states before reading the first character of x at position x=0, y=v */ /** THIS IS THE MAIN RECURRENCE **/ for (u = mo->max_offset_x + 1; u < X->length; u++) { for (v = -mo->max_offset_y; v < Y->length; v++) { for (j = 0; j < mo->N; j++) { /** initialization of phi (lookback matrix), psi (traceback) **/ set_phi(pv, u, v, j, +1); set_psi(pv, u, v, j, -1); } for (i = 0; i < mo->N; i++) { /* Determine the maximum */ /* max_phi = phi[i] + log_in_a[j][i] ... */ if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) { max_value = -DBL_MAX; set_psi(pv, u, v, i, -1); for (j = 0; j < mo->s[i].in_states; j++) { /* look back in the phi matrix at the offsets */ previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]); log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v); if ( previous_prob != +1 && log_in_a_ij != +1) { value = previous_prob + log_in_a_ij; if (value > max_value) { max_value = value; set_psi(pv, u, v, i, mo->s[i].in_id[j]); } } else {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */ } emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y); #ifdef DEBUG if (emission > ghmm_dpmodel_emission_table_size(mo, i)){ printf("State %i\n", i); ghmm_dpmodel_state_print(&(mo->s[i])); printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], ghmm_dpmodel_emission_table_size(mo, i), emission); } #endif log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), mo->size_of_alphabet[mo->s[i].alphabet], mo->s[i].offset_x, mo->s[i].offset_y)); /* No maximum found (that is, state never reached) or the output O[t] = 0.0: */ if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */ log_b_i == +1 ) { set_phi(pv, u, v, i, +1); } else set_phi(pv, u, v, i, max_value + log_b_i); } } /* complete time step for emitting states */ /* last_osc = osc; */ /* save last transition class */ /*if (mo->model_type & kSilentStates) { p__viterbi_silent( mo, t, v ); }*/ /* complete time step for silent states */ /************** for (j = 0; j < mo->N; j++) { printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]); } for (i = 0; i < mo->N; i++){ printf("%d\t", former_matchcount[i]); } for (i = 0; i < mo->N; i++){ printf("%d\t", recent_matchcount[i]); } ****************/ } /* End for v in Y */ /* Next character in X */ push_back_phi(pv, Y->length); } /* End for u in X */ /* Termination */ max_value = -DBL_MAX; ighmm_list_append(state_list, -1); /* if start_traceback_with is -1 (it is by default) search for the most likely state at the end of both sequences */ if (start_traceback_with == -1) { for (j = 0; j < mo->N; j++){ #ifdef DEBUG printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j)); #endif if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j); state_list->last->val = j; } } } /* this is the special traceback mode for the d & c algorithm that also connects the traceback to the first state of the rest of the path */ else { #ifdef DEBUG printf("D & C traceback from state %i!\n", start_traceback_with); printf("Last characters emitted X: %i, Y: %i\n", ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, X->length-1), ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, Y->length-1)); for (j = 0; j < mo->N; j++){ printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); } #endif max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with); if (max_value != 1 && max_value > -DBL_MAX) state_list->last->val = start_traceback_with; } if (max_value == -DBL_MAX) { /* Sequence can't be generated from the model! */ *log_p = +1; /* Backtracing doesn't work, because state_seq[*] allocated with -1 */ /* for (t = len - 2; t >= 0; t--) state_list->last->val = -1; */ } else { /* Backtracing, should put DEL path nicely */ *log_p = max_value; /* removed the handling of silent states here */ /* start trace back at the end of both sequences */ u = X->length - 1; v = Y->length - 1; current_state_index = state_list->first->val; off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { /* while (u > 0 && v > 0) { */ /* look up the preceding state and save it in the first position of the state list */ /* printf("Current state %i at (%i,%i) -> preceding state %i\n", current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */ /* update the current state */ current_state_index = get_psi(pv, u, v, current_state_index); if (current_state_index != -1) ighmm_list_insert(state_list, current_state_index); /* move in the alignment matrix */ u -= off_x; v -= off_y; /* get the next offsets */ off_x = mo->s[current_state_index].offset_x; off_y = mo->s[current_state_index].offset_y; } } /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , mo->max_offset_y); /* printf("After traceback: last state = %i\n", state_list->last->val); */ state_seq = ighmm_list_to_array(state_list); *path_length = state_list->length; /* PRINT PATH */ /* fprintf(stderr, "Viterbi path: " ); */ /* int t; */ /* for(t=0; t < *path_length; t++) */ /* if (state_seq[t] >= 0) fprintf(stderr, " %d ", state_seq[t]); */ /* fprintf(stderr, "\n Freeing ... \n"); */ return (state_seq); STOP: /* Label STOP from ARRAY_[CM]ALLOC */ /* Free the memory space */ pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, mo->max_offset_y); m_free(state_seq); ighmm_list_free(state_list); return NULL; #undef CUR_PROC } /* viterbi */