Example #1
0
int ghmm_dpmodel_emission_table_size(ghmm_dpmodel* mo, int state_index) {
  /* the alphabet is over single sequences so get the maximal index for the
     lookup of emission probabilities and use it to determine the size of
     the lookup table */
  int size =  mo->size_of_alphabet[mo->s[state_index].alphabet];
  return ghmm_dpmodel_pair(size - 1, size - 1, size, mo->s[state_index].offset_x, mo->s[state_index].offset_y) + 1;
}
Example #2
0
/*============================================================================*/
static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
				       cell * start, cell * stop,
				       double * log_p,
				       plocal_propagate_store_t * pv) {
#define CUR_PROC "pviterbi_step"
  /* printf("---- propagate step -----\n"); */
  int u, v, j, i;
  double value, max_value, previous_prob;  
  /* int len_path  = mo->N*len; the length of the path is not known apriori */
  int start_x, start_y, stop_x, stop_y;
  double log_b_i, log_in_a_ij;
  cell * middle = NULL;
  int middle_x;
  double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, 
		     ghmm_dpseq*, int, int);
  log_in_a = &sget_log_in_a_prop;
  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
  middle_x = start_x + (stop_x - start_x) / 2;
/*   if (mo->model_type & kSilentStates &&  */
/*       mo->silent != NULL &&  */
/*       mo->topo_order == NULL) { */
/*     ghmm_dmodel_topo_order( mo );  */
/*   } */
  init_phi_prop(pv, X, Y, start, stop);
#ifdef DEBUG
  if (start != NULL && mo->s[start->state].offset_y == 0) {
    for (u = 0; u<=mo->max_offset_x; u++) {
      printf("row %i of phi\n", u);
      for (v = start_y - 1; v < stop_y; v++) {
	printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state));
      }
      printf("\n\n");
    }
  }
#endif
  /* u, v > 0 */
  /** THIS IS THE MAIN RECURRENCE **/
  /* printf("Main loop x from %i to %i and y from %i to %i\n", 
     start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/
  for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) {
    for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix) **/
	  set_phi_prop(pv, u, v, j, +1);
	  set_end_of_first(pv, 0, v, j, NULL);
	}
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) {
	  max_value = -DBL_MAX;
	  set_end_of_first(pv, 0, v, i, NULL);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x,
					   mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && 
		 log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		/* Critical point for the propagate algorithm if we are at the
		   middle point of sequence X store this at the end point of
		   the first alignment */	      
		if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
		  cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1),
						  v - (mo->s[i].offset_y - 1), 
						  i, mo->s[i].in_id[j],
						  previous_prob, 
						  log_in_a_ij);
		  if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
		    cell * old = get_end_of_first(pv, u, v, 0, 0, i);
		    m_free(old);
		  }
		  set_end_of_first(pv, u, v, i, end_of_first);
		}
		else {
		  /* at all other points simply propagate the values on */
		  set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, 
							     mo->s[i].offset_x,
							     mo->s[i].offset_y,
							     mo->s[i].in_id[j]));
		}
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }
#ifdef DEBUG
	  int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
					   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
					   mo->size_of_alphabet[mo->s[i].alphabet],
					   mo->s[i].offset_x, mo->s[i].offset_y));
	  /* No maximum found (that is, state never reached)
	     or the output O[t] = 0.0: */
	  if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
	      log_b_i == +1 ) {
	    set_phi_prop(pv, u, v, i, 1);
	  }
	  else
	    set_phi_prop(pv, u, v, i, max_value + log_b_i);
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/

    } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
    push_back_phi_prop(pv, Y->length);
  } /* End for u in X */
  /* Termination */
  max_value = -DBL_MAX;
  /* for the last segment search for the maximum probability at the end of the
     two sequences */
  if (stop == NULL){
    for (j = 0; j < mo->N; j++){
#ifdef DEBUG
    /* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]);
       ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */
#endif
      if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 && 
	   get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) { 
	max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j);
	middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j);
      }
    }
  }
  /* traceback for the interior segments have to start with the previous state
     of the middle beacuse the path has to be connected */
  else {
    if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) {
      max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
      middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
    }
  }
  if (max_value == -DBL_MAX) {
    /* Sequence can't be generated from the model! */
    *log_p = +1;
  }
  else {
    *log_p = max_value;
  }
  return middle;
#undef CUR_PROC
}
Example #3
0
/*============================================================================*/
static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X,
			   ghmm_dpseq * Y, cell * start, cell * stop) {
#define CUR_PROC "init_phi_prop"
  int u, v, j, i, off_x, y;
  double value, max_value, previous_prob, log_b_i, log_in_a_ij ;
  int start_x, start_y, stop_x, stop_y, middle_x;
  ghmm_dpmodel * mo = pv->mo;
  double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, 
		     ghmm_dpseq*, int, int);
  log_in_a = &sget_log_in_a_prop;
  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
  pv->start_x = start_x;
  pv->start_y = start_y;
  middle_x = start_x + (stop_x - start_x) / 2;
  /* to be sure that we do not look up something out of the bounds set the
     whole matrix to 1 */
  /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) {
	pv->phi[off_x][y][j] = +1;
      }
  /* Inititalize the end_of_first matrix */
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) 
	if (pv->end_of_first[off_x][y][j]) {
	  /* m_free(pv->end_of_first[off_x][y][j]); */
	  pv->end_of_first[off_x][y][j] = NULL;
	}
  if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */
    /*p__viterbi_silent( mo, t=0, v);*/
  }
  /*for (j = 0; j < mo->N; j++)
    {
      printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
    }

  for( i = 0; i < mo->N; i++){
    printf("%d\t", former_matchcount[i]);
  }
  for (i = 0; i < mo->N; i++){
    printf("%d\t", recent_matchcount[i]);
  }*/
  
  /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
  /* this is in principle the same as the main recurrence but adds initial
     probabilities to states that cannot be inhabitated at u=0, v=0 because
     of greater offsets than one */
  /* u, v <= max offsets */
  for (u = -1; u <= mo->max_offset_x; u++) {
    for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix) **/
	  set_phi_prop(pv, u, v, j, 1);
	  /** traceback for the propagate algorithm **/
	  set_end_of_first(pv, u, v, j, NULL);
	}
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	  max_value = -DBL_MAX;
	  set_end_of_first(pv, u, v, i, NULL);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, 
					 mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		/* Critical point for the propagate algorithm if we are at the
		   middle point of sequence X store this at the end point of
		   the first alignment */
		if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
		  cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), 
						  v - (mo->s[i].offset_y - 1), 
						  i, mo->s[i].in_id[j],
						  previous_prob, 
						  log_in_a_ij);
		  if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
		    cell * old = get_end_of_first(pv, u, v, 0, 0, i);
		    m_free(old);
		  }
		  set_end_of_first(pv, u, v, i, end_of_first);
		}
		else {
		  /* at all other points simply propagate the values on */
		  set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, 
							     mo->s[i].offset_x,
							     mo->s[i].offset_y,
							     mo->s[i].in_id[j]));
		}
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }
#ifdef DEBUG
	  int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, 
							       mo->s[i].alphabet,
							       u + start_x),
					   ghmm_dpseq_get_char(Y, 
							       mo->s[i].alphabet,
							       v),
					   mo->size_of_alphabet[mo->s[i].alphabet],
					   mo->s[i].offset_x, mo->s[i].offset_y));

	  /* this is the difference from the main loop:
	     check whether this state could be an initial state and add the
	     initial probability */
	  if (log_b_i == +1 ) {
	    set_phi_prop(pv, u, v, i, +1);
	  }
	  else {
	    if (max_value == -DBL_MAX)
	      set_phi_prop(pv, u, v, i, +1);
	    else
	      set_phi_prop(pv, u, v, i, max_value);
	    /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && 
	       mo->s[i].offset_y - 1 + start_y == v) { */
	    if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && 
		mo->s[i].offset_y - 1 + start_y == v){
	      set_phi_prop(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG	     
	      printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i));
	      printf("Characters emitted X: %i, Y: %i\n", 
		     ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x),
		     ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
	    }
	    if (get_phi_prop(pv, u, v, 0, 0, i) != 1) {
	      set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i);
	    }
	  }
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/

    } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
    push_back_phi_prop(pv, Y->length);
#ifdef DEBUG
    if (start != NULL && mo->s[start->state].offset_y == 0) {
      max_value = -DBL_MAX;
      i = -1;     
      y = -1;
      off_x = -1;
      int x;
      for (x = 0; x<=mo->max_offset_x; x++)
	for (v = - mo->max_offset_y; v<Y->length; v++) {
	  for (j=0; j<mo->N; j++) {
	    if (get_phi_prop(pv, x, v, x, 0, j) >= max_value && 
		get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) {
	      max_value = get_phi_prop(pv, x, v, x, 0, j);
	      i = j;
	      off_x = x;
	      y = v;
	    }
	  }
	}
      printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x);
      printf("max log prob state %i at (%i, %i) = %f after pushback\n",
	     i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i));
    }
#endif
  } /* End for u in X */
#undef CUR_PROC
}
Example #4
0
/*============================================================================*/
double ghmm_dpmodel_viterbi_logp(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
			    int *state_seq, int state_seq_len) {
#define CUR_PROC "ghmm_dpmodel_viterbi_logp"
  int s, t, i, j, u, v;
  double log_p = 0.0;
  double log_b_i = 1.0;
  double log_in_a = 1.0;
  plocal_store_t *pv;

  /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
  pv = pviterbi_alloc(mo, 0, 0);
  pviterbi_precompute(mo, pv);
  /* initial state */
  t = 0; u = -1; v = -1;
  if (state_seq_len > t) {
    i = state_seq[t];
    /* initial state probability */
    /* log_p += log(mo->s[i].pi); */
    log_p += mo->s[i].log_pi;
    if (log_p == 1.0) {
      pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
      fprintf(stderr, "the initial probability of state %i is zero\n", i);
      return 1.0;/* the initial prob is zero */
    }
    /* consume the first characters of the sequences */
    u += mo->s[i].offset_x;
    v += mo->s[i].offset_y;
    /* get the emission probability */
    log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				mo->size_of_alphabet[mo->s[i].alphabet],
				mo->s[i].offset_x, mo->s[i].offset_y));
    if (log_b_i == 1.0) { /* chars cant be emitted */
      pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
      fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n",
	      ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
	      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t);
      return 1.0;
    }
    log_p += log_b_i;
  }
  else { /* there is no path.. */
    pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
    fprintf(stderr, "No path given!\n");
    return 1.0;
  }
  /* rest of the sequence */
  for (t=1; t<state_seq_len; t++) {
    j = i; /* predecessor state */
    i = state_seq[t]; /* current state */
    /* consume the next characters */
    u += mo->s[i].offset_x;
    v += mo->s[i].offset_y;
    if (u >= X->length || v >= Y->length) { /* path consumes too many chars */
      pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
      fprintf(stderr, "path consumes too many chars\n");
      return 1.0;
    }
    /* transition prob state j -> i*/
    log_in_a = 1.0;
    for (s=0; s<mo->s[i].in_states; s++) {
      if (mo->s[i].in_id[s] == j) {
	log_in_a = sget_log_in_a(pv, i, s, X, Y, u, v);
	break;
      }
    }
    if (log_in_a == 1.0) {
      pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
      fprintf(stderr, "transition (%i -> %i) at t=%i not possible\n", j, i,t); 
      return 1.0; /* transition not possible */
    }
    /* emission probability */
    log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				mo->size_of_alphabet[mo->s[i].alphabet],
				mo->s[i].offset_x, mo->s[i].offset_y));
    if (log_b_i == 1.0) {
      pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, mo->max_offset_y);
      fprintf(stderr, "characters (%i, %i) at position (%i, %i) cannot be emitted by state %i (t=%i)\n",
	      ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
	      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v), u, v, i, t);
      return 1.0; /* characters cant be emitted */
    }
    log_p += log_in_a + log_b_i;
  }
  pviterbi_free(&pv, mo->N, 0, 0, mo->max_offset_x, 
		mo->max_offset_y);
  /* check if all of the sequences has been consumed */
  if (u != X->length - 1 && v != Y->length - 1) {
    fprintf(stderr, "path consumes not all characters (%i of %i, %i of %i)\n",
	    u + 1, X->length, v + 1, Y->length);
    return 1.0;
  }
  return log_p;

#undef CUR_PROC
} /* ghmm_dpmodel_viterbi_logp */
Example #5
0
/*============================================================================*/
int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
				 double *log_p, int *path_length,
				 int start_traceback_with) {
#define CUR_PROC "ghmm_dpmodel_viterbi"
  int u, v, j, i, off_x, off_y, current_state_index;
  double value, max_value, previous_prob;  
  plocal_store_t *pv;
  int *state_seq = NULL;
  int emission;
  double log_b_i, log_in_a_ij;
  double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int);

  /* printf("---- viterbi -----\n"); */
  i_list * state_list;
  state_list = ighmm_list_init_list();
  log_in_a = &sget_log_in_a;
  /* int len_path  = mo->N*len; the length of the path is not known apriori */

/*   if (mo->model_type & kSilentStates &&  */
/*       mo->silent != NULL &&  */
/*       mo->topo_order == NULL) { */
/*     ghmm_dmodel_topo_order( mo );  */
/*   } */

  /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
  pv = pviterbi_alloc(mo, X->length, Y->length);
  if (!pv)                        { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }

  /* Precomputing the log(a_ij) and log(bj(ot)) */
  pviterbi_precompute(mo, pv);
  /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/
  init_phi(pv, X, Y);
  
  /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 
     which corresponds to a series of gap states before reading the first 
     character of x at position x=0, y=v */
  /** THIS IS THE MAIN RECURRENCE **/
  for (u = mo->max_offset_x + 1; u < X->length; u++) {
    for (v = -mo->max_offset_y; v < Y->length; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix), psi (traceback) **/
	  set_phi(pv, u, v, j, +1);
	  set_psi(pv, u, v, j, -1);
	}
      
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	  max_value = -DBL_MAX;
	  set_psi(pv, u, v, i, -1);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		set_psi(pv, u, v, i, mo->s[i].in_id[j]);
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }

	  emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
#ifdef DEBUG
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				      mo->size_of_alphabet[mo->s[i].alphabet],
				      mo->s[i].offset_x, mo->s[i].offset_y));

	  /* No maximum found (that is, state never reached)
	     or the output O[t] = 0.0: */
	  if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
	      log_b_i == +1 ) {
	    set_phi(pv, u, v, i, +1);
	  }
	  else
	    set_phi(pv, u, v, i, max_value + log_b_i);
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ 
        /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/
    } /* End for v in Y */
    /* Next character in X */
    push_back_phi(pv, Y->length);
  } /* End for u in X */

  /* Termination */
  max_value = -DBL_MAX;
  ighmm_list_append(state_list, -1);
  /* if start_traceback_with is -1 (it is by default) search for the most 
     likely state at the end of both sequences */
  if (start_traceback_with == -1) {
    for (j = 0; j < mo->N; j++){
#ifdef DEBUG
      printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j));
#endif
      if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && 
	   get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { 
	max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j);
	state_list->last->val = j;
      }
    }
  }
  /* this is the special traceback mode for the d & c algorithm that also 
     connects the traceback to the first state of the rest of the path */
  else {
#ifdef DEBUG
    printf("D & C traceback from state %i!\n", start_traceback_with);
    printf("Last characters emitted X: %i, Y: %i\n", 
	   ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, 
			       X->length-1),
	   ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, 
			       Y->length-1));
    for (j = 0; j < mo->N; j++){
      printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); 
    }
#endif
    max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with);
    if (max_value != 1 && max_value > -DBL_MAX)
      state_list->last->val = start_traceback_with;
  }
  if (max_value == -DBL_MAX) {
    /* Sequence can't be generated from the model! */
    *log_p = +1;
    /* Backtracing doesn't work, because state_seq[*] allocated with -1 */
    /* for (t = len - 2; t >= 0; t--)
       state_list->last->val = -1;    */
  }
  else {
    /* Backtracing, should put DEL path nicely */
    *log_p = max_value;
    /* removed the handling of silent states here */
    /* start trace back at the end of both sequences */
    u = X->length - 1;
    v = Y->length - 1;
    current_state_index = state_list->first->val;
    off_x = mo->s[current_state_index].offset_x;
    off_y = mo->s[current_state_index].offset_y;
    while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { 
      /* while (u > 0 && v > 0) { */
      /* look up the preceding state and save it in the first position of the
	 state list */
      /* printf("Current state %i at (%i,%i) -> preceding state %i\n", 
	 current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */
      /* update the current state */
      current_state_index = get_psi(pv, u, v, current_state_index);
      if (current_state_index != -1)
	ighmm_list_insert(state_list, current_state_index);
      /* move in the alignment matrix */
      u -= off_x;
      v -= off_y; 
      /* get the next offsets */
      off_x = mo->s[current_state_index].offset_x;
      off_y = mo->s[current_state_index].offset_y;
    }
  }
  
  /* Free the memory space */
  pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , 
		mo->max_offset_y);
  /* printf("After traceback: last state = %i\n", state_list->last->val); */
  state_seq = ighmm_list_to_array(state_list);
  *path_length = state_list->length;
  /* PRINT PATH */
  
/*   fprintf(stderr, "Viterbi path: " ); */
/*   int t; */
/*   for(t=0; t < *path_length; t++) */
/*     if (state_seq[t] >= 0) fprintf(stderr, " %d ",  state_seq[t]); */
/*   fprintf(stderr, "\n Freeing ... \n");  */
  return (state_seq);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  /* Free the memory space */
  pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, 
		mo->max_offset_y);
  m_free(state_seq);
  ighmm_list_free(state_list);
  return NULL;
#undef CUR_PROC
} /* viterbi */
Example #6
0
/*============================================================================*/
static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) {
#ifdef DEBUG
  int emission;
#endif
  int u, v, j, i, off_x, y;
  double log_in_a_ij;
  double value, max_value, previous_prob, log_b_i;  
  /* printf("ghmm_dpmodel_viterbi init\n"); */
  ghmm_dpmodel * mo = pv->mo;
  double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, 
		     int, int);
  log_in_a = &sget_log_in_a;

  /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) {
	pv->phi[off_x][y][j] = +1;
      }
    if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */

    /*p__viterbi_silent( mo, t=0, v);*/
  }
  /*for (j = 0; j < mo->N; j++)
    {
      printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
    }

  for( i = 0; i < mo->N; i++){
    printf("%d\t", former_matchcount[i]);
  }
  for (i = 0; i < mo->N; i++){
    printf("%d\t", recent_matchcount[i]);
  }*/
  
  /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
  /* this is in principle the same as the main recurrence but adds initial
     probabilities to states that cannot be inhabitated at u=0, v=0 because
     of greater offsets than one 
     iteration start is u=-1 v=-1 to allow states with offset_x == 0 
     which corresponds to a series of gap states before reading the first 
     character of x at position x=0, y=v or equally for offset_y == 0 */
  /* u, v <= max offsets */
    for (u = -1; u <= mo->max_offset_x; u++) {
      for (v = -mo->max_offset_y; v < Y->length; v++) {
	for (j = 0; j < mo->N; j++) 
	  {
	    /** initialization of phi (lookback matrix), psi (traceback) **/
	    set_phi(pv, u, v, j, +1);
	    set_psi(pv, u, v, j, -1);
	  }
	/* for each state i */
	for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	  if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	    max_value = -DBL_MAX;
	    set_psi(pv, u, v, i, -1);
	    for (j = 0; j < mo->s[i].in_states; j++) {
	      /* look back in the phi matrix at the offsets */
	      previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, 
				      mo->s[i].offset_y, mo->s[i].in_id[j]);
	      log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	      if ( previous_prob != +1 && log_in_a_ij != +1) {
		value = previous_prob + log_in_a_ij;
		if (value > max_value) {
		  max_value = value;
		  set_psi(pv, u, v, i, mo->s[i].in_id[j]);
		}
	      }
	      else
		{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	    }
#ifdef DEBUG
	    emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				mo->size_of_alphabet[mo->s[i].alphabet],
				mo->s[i].offset_x, mo->s[i].offset_y);
	    if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	      printf("State %i\n", i);
	      ghmm_dpmodel_state_print(&(mo->s[i]));
	      printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		     ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		     ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		     mo->size_of_alphabet[mo->s[i].alphabet],
		     ghmm_dpmodel_emission_table_size(mo, i), emission);
	    }
#endif
	    log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
					ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
					mo->size_of_alphabet[mo->s[i].alphabet],
					mo->s[i].offset_x, mo->s[i].offset_y));
	    
	    /* this is the difference from the main loop:
	       check whether this state could be an initial state and add the
	       initial probability */
	    if (log_b_i == +1 ) {
	      set_phi(pv, u, v, i, +1);
	    }
	    else {
	      if (max_value == -DBL_MAX)
		set_phi(pv, u, v, i, +1);
	      else
		set_phi(pv, u, v, i, max_value);
	      /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && 
		 mo->s[i].offset_y - 1 == v) { */
	      if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && 
		  mo->s[i].offset_y - 1 == v) {
		set_phi(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG
		printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i));
		printf("Characters emitted X: %i, Y: %i\n", 
		       ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		       ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
	      }
	      if (get_phi(pv, u, v, 0, 0, i) != 1)
		set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i);
	    }
	  }
	  /* if (v == 0) {
	     printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]);
	     } */
	} /* complete time step for emitting states */
	
	/* last_osc = osc; */
	/* save last transition class */
	
	/*if (mo->model_type & kSilentStates) { 
	  p__viterbi_silent( mo, t, v );
	  }*/ /* complete time step for silent states */
	
	/**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/
      } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
      push_back_phi(pv, Y->length);
    } /* End for u in X */
}