示例#1
0
/*============================================================================*/
static double log_b_prop(plocal_propagate_store_t * pv, int state, int emission) {
#ifdef DEBUG
  if (state > pv->mo->N) 
    fprintf(stderr, "log_b_prop: State index out of bounds %i > %i\n", state, pv->mo->N);
  if (emission > ghmm_dpmodel_emission_table_size(pv->mo, state))
    fprintf(stderr, "log_b_prop: Emission index out of bounds %i > %i for state %i\n",
	    emission, ghmm_dpmodel_emission_table_size(pv->mo, state), state);
#endif
  return pv->log_b[state][emission];
}
示例#2
0
/*============================================================================*/
static void pviterbi_prop_precompute (ghmm_dpmodel *mo, plocal_propagate_store_t *pv)
{
#define CUR_PROC "pviterbi_precompute"
  int i, j, emission, t_class;
  
  /* Precomputing the log(a_ij) */  
  for (j = 0; j < mo->N; j++)
    for (i = 0; i < mo->s[j].in_states; i++) 
      for (t_class = 0; t_class < mo->s[mo->s[j].in_id[i]].kclasses; t_class++){
	if ( mo->s[j].in_a[i][t_class] == 0.0 )  
	  pv->log_in_a[j][i][t_class] = +1; 
	else
	  pv->log_in_a[j][i][t_class] = log( mo->s[j].in_a[i][t_class] );
      } 

  /* Precomputing the log emission probabilities for each state*/
  for (j = 0; j < mo->N; j++) {
    for (emission = 0; emission < ghmm_dpmodel_emission_table_size(mo,j); emission++) {
      if (1) {
	if ( mo->s[j].b[emission] == 0.0 )   /* DBL_EPSILON ? */ 
	  pv->log_b[j][emission] = +1; 
	else
	  pv->log_b[j][emission] = log( mo->s[j].b[emission] );
      }
      else{
	pv->log_b[j][emission] = 0.0; 
      }
    }
    pv->log_b[j][emission] = 1; /* the last field is for invalid emissions */
  }
#undef CUR_PROC
}/* viterbi_precompute */
示例#3
0
/*============================================================================*/
static void ghmm_dpmodel_print_viterbi_store(plocal_store_t * pv) {
  int j, k;
  ghmm_dpmodel * mo;

  printf("Local store for pair HMM viterbi algorithm\n");
  printf("Log in a:\n");
  mo = pv->mo;
  for (j = 0; j < mo->N; j++){
    printf("state %i in states %i\n", j, mo->s[j].in_states);
    for (k=0; k<mo->s[j].in_states; k++)
      printf("FIXME: log_in_a has three dimensions!"/*From: %i %f\n", mo->s[j].in_id[k], pv->log_in_a[j][k]*/);
  }
  printf("Log b:\n");
  for (j = 0; j < mo->N; j++){
    printf("state %i #chars: %i\n", j, ghmm_dpmodel_emission_table_size(mo, j));
    for (k=0; k<ghmm_dpmodel_emission_table_size(mo, j); k++)
      printf("Emission prob char: %i %f\n", k, pv->log_b[j][k]);
  } 
}
示例#4
0
/*============================================================================*/
static plocal_propagate_store_t * pviterbi_propagate_alloc (ghmm_dpmodel *mo, int len_y) {
#define CUR_PROC "pviterbi_propagate_alloc"
  plocal_propagate_store_t* v = NULL;
  int i, j, k;
  ARRAY_CALLOC (v, 1);

  v->mo = mo;
  v->len_y = len_y;
  /* Allocate the log_in_a's -> individal lenghts */
  ARRAY_CALLOC (v->log_in_a, mo->N);
  /* first index of log_in_a: target state */
  for (j = 0; j < mo->N; j++){ 
    /* second index: source state */
    ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);
    for (i=0; i<mo->s[j].in_states; i++) {
      /* third index: transition classes of source state */
      ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);
    }
  }

  ARRAY_CALLOC (v->log_b, mo->N);
  for (j=0; j<mo->N; j++) {
    ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1);
  }
  if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
  if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  ARRAY_CALLOC (v->phi_new, mo->N);
  ARRAY_CALLOC (v->end_of_first, mo->max_offset_x + 1);
  for (j=0; j<mo->max_offset_x + 1; j++) {
    ARRAY_CALLOC (v->end_of_first[j], len_y + mo->max_offset_y + 1);
    for (i=0; i<len_y + mo->max_offset_y + 1; i++) {
      ARRAY_CALLOC (v->end_of_first[j][i], mo->N);
      for (k=0; k<mo->N; k++)
	v->end_of_first[j][i][k] = NULL;
	/*ARRAY_CALLOC (v->end_of_first[j][i][k], 1);*/
    }
  }
  v->topo_order_length = 0;
  ARRAY_CALLOC (v->topo_order, mo->N);
  return(v);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  pviterbi_propagate_free(&v, mo->N, mo->max_offset_x, mo->max_offset_y, len_y);
  return(NULL);
#undef CUR_PROC
}
示例#5
0
/*============================================================================*/
static cell * pviterbi_propagate_step (ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
				       cell * start, cell * stop,
				       double * log_p,
				       plocal_propagate_store_t * pv) {
#define CUR_PROC "pviterbi_step"
  /* printf("---- propagate step -----\n"); */
  int u, v, j, i;
  double value, max_value, previous_prob;  
  /* int len_path  = mo->N*len; the length of the path is not known apriori */
  int start_x, start_y, stop_x, stop_y;
  double log_b_i, log_in_a_ij;
  cell * middle = NULL;
  int middle_x;
  double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, 
		     ghmm_dpseq*, int, int);
  log_in_a = &sget_log_in_a_prop;
  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
  middle_x = start_x + (stop_x - start_x) / 2;
/*   if (mo->model_type & kSilentStates &&  */
/*       mo->silent != NULL &&  */
/*       mo->topo_order == NULL) { */
/*     ghmm_dmodel_topo_order( mo );  */
/*   } */
  init_phi_prop(pv, X, Y, start, stop);
#ifdef DEBUG
  if (start != NULL && mo->s[start->state].offset_y == 0) {
    for (u = 0; u<=mo->max_offset_x; u++) {
      printf("row %i of phi\n", u);
      for (v = start_y - 1; v < stop_y; v++) {
	printf("phi(0, %i, %i): %f, ", v, start->state, get_phi_prop(pv, 0, v, 0, 0, start->state));
      }
      printf("\n\n");
    }
  }
#endif
  /* u, v > 0 */
  /** THIS IS THE MAIN RECURRENCE **/
  /* printf("Main loop x from %i to %i and y from %i to %i\n", 
     start_x + mo->max_offset_x + 1, stop_x, start_y, stop_y);*/
  for (u = start_x + mo->max_offset_x + 1; u < stop_x; u++) {
    for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix) **/
	  set_phi_prop(pv, u, v, j, +1);
	  set_end_of_first(pv, 0, v, j, NULL);
	}
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i]) {
	  max_value = -DBL_MAX;
	  set_end_of_first(pv, 0, v, i, NULL);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x,
					   mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && 
		 log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		/* Critical point for the propagate algorithm if we are at the
		   middle point of sequence X store this at the end point of
		   the first alignment */	      
		if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
		  cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1),
						  v - (mo->s[i].offset_y - 1), 
						  i, mo->s[i].in_id[j],
						  previous_prob, 
						  log_in_a_ij);
		  if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
		    cell * old = get_end_of_first(pv, u, v, 0, 0, i);
		    m_free(old);
		  }
		  set_end_of_first(pv, u, v, i, end_of_first);
		}
		else {
		  /* at all other points simply propagate the values on */
		  set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, 
							     mo->s[i].offset_x,
							     mo->s[i].offset_y,
							     mo->s[i].in_id[j]));
		}
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }
#ifdef DEBUG
	  int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
					   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
					   mo->size_of_alphabet[mo->s[i].alphabet],
					   mo->s[i].offset_x, mo->s[i].offset_y));
	  /* No maximum found (that is, state never reached)
	     or the output O[t] = 0.0: */
	  if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
	      log_b_i == +1 ) {
	    set_phi_prop(pv, u, v, i, 1);
	  }
	  else
	    set_phi_prop(pv, u, v, i, max_value + log_b_i);
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/

    } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
    push_back_phi_prop(pv, Y->length);
  } /* End for u in X */
  /* Termination */
  max_value = -DBL_MAX;
  /* for the last segment search for the maximum probability at the end of the
     two sequences */
  if (stop == NULL){
    for (j = 0; j < mo->N; j++){
#ifdef DEBUG
    /* printf("phi(len_x)(len_y)(%i)=%f\n", j, pv->phi[0][stop_y-1][j]);
       ghmm_dpmodel_print_cell(pv->end_of_first[0][stop_y - 1][j]); */
#endif
      if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) != +1 && 
	   get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j) > max_value) { 
	max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, j);
	middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, j);
      }
    }
  }
  /* traceback for the interior segments have to start with the previous state
     of the middle beacuse the path has to be connected */
  else {
    if ( get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state) != +1 ) {
      max_value = get_phi_prop(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
      middle = get_end_of_first(pv, stop_x - 1, stop_y - 1, 0, 0, stop->previous_state);
    }
  }
  if (max_value == -DBL_MAX) {
    /* Sequence can't be generated from the model! */
    *log_p = +1;
  }
  else {
    *log_p = max_value;
  }
  return middle;
#undef CUR_PROC
}
示例#6
0
/*============================================================================*/
static void init_phi_prop (plocal_propagate_store_t * pv, ghmm_dpseq * X,
			   ghmm_dpseq * Y, cell * start, cell * stop) {
#define CUR_PROC "init_phi_prop"
  int u, v, j, i, off_x, y;
  double value, max_value, previous_prob, log_b_i, log_in_a_ij ;
  int start_x, start_y, stop_x, stop_y, middle_x;
  ghmm_dpmodel * mo = pv->mo;
  double (*log_in_a)(plocal_propagate_store_t*, int, int, ghmm_dpseq*, 
		     ghmm_dpseq*, int, int);
  log_in_a = &sget_log_in_a_prop;
  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
  pv->start_x = start_x;
  pv->start_y = start_y;
  middle_x = start_x + (stop_x - start_x) / 2;
  /* to be sure that we do not look up something out of the bounds set the
     whole matrix to 1 */
  /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) {
	pv->phi[off_x][y][j] = +1;
      }
  /* Inititalize the end_of_first matrix */
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) 
	if (pv->end_of_first[off_x][y][j]) {
	  /* m_free(pv->end_of_first[off_x][y][j]); */
	  pv->end_of_first[off_x][y][j] = NULL;
	}
  if (mo->model_type & GHMM_kSilentStates) { /* could go into silent state at t=0 */
    /*p__viterbi_silent( mo, t=0, v);*/
  }
  /*for (j = 0; j < mo->N; j++)
    {
      printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
    }

  for( i = 0; i < mo->N; i++){
    printf("%d\t", former_matchcount[i]);
  }
  for (i = 0; i < mo->N; i++){
    printf("%d\t", recent_matchcount[i]);
  }*/
  
  /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
  /* this is in principle the same as the main recurrence but adds initial
     probabilities to states that cannot be inhabitated at u=0, v=0 because
     of greater offsets than one */
  /* u, v <= max offsets */
  for (u = -1; u <= mo->max_offset_x; u++) {
    for (v = start_y - mo->max_offset_y; v < stop_y; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix) **/
	  set_phi_prop(pv, u, v, j, 1);
	  /** traceback for the propagate algorithm **/
	  set_end_of_first(pv, u, v, j, NULL);
	}
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	  max_value = -DBL_MAX;
	  set_end_of_first(pv, u, v, i, NULL);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi_prop(pv, u, v, mo->s[i].offset_x, 
					 mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		/* Critical point for the propagate algorithm if we are at the
		   middle point of sequence X store this at the end point of
		   the first alignment */
		if (u - middle_x < mo->s[i].offset_x && u - middle_x >= 0) {
		  cell * end_of_first = init_cell(u - (mo->s[i].offset_x - 1), 
						  v - (mo->s[i].offset_y - 1), 
						  i, mo->s[i].in_id[j],
						  previous_prob, 
						  log_in_a_ij);
		  if (get_end_of_first(pv, u, v, 0, 0, i) != NULL) {
		    cell * old = get_end_of_first(pv, u, v, 0, 0, i);
		    m_free(old);
		  }
		  set_end_of_first(pv, u, v, i, end_of_first);
		}
		else {
		  /* at all other points simply propagate the values on */
		  set_end_of_first(pv, u, v, i, get_end_of_first(pv, u, v, 
							     mo->s[i].offset_x,
							     mo->s[i].offset_y,
							     mo->s[i].in_id[j]));
		}
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }
#ifdef DEBUG
	  int emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b_prop(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, 
							       mo->s[i].alphabet,
							       u + start_x),
					   ghmm_dpseq_get_char(Y, 
							       mo->s[i].alphabet,
							       v),
					   mo->size_of_alphabet[mo->s[i].alphabet],
					   mo->s[i].offset_x, mo->s[i].offset_y));

	  /* this is the difference from the main loop:
	     check whether this state could be an initial state and add the
	     initial probability */
	  if (log_b_i == +1 ) {
	    set_phi_prop(pv, u, v, i, +1);
	  }
	  else {
	    if (max_value == -DBL_MAX)
	      set_phi_prop(pv, u, v, i, +1);
	    else
	      set_phi_prop(pv, u, v, i, max_value);
	    /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && 
	       mo->s[i].offset_y - 1 + start_y == v) { */
	    if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && 
		mo->s[i].offset_y - 1 + start_y == v){
	      set_phi_prop(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG	     
	      printf("Initial log prob state %i at (%i, %i) = %f\n", i, start_x + u, v, get_phi_prop(pv, u, v, 0, 0, i));
	      printf("Characters emitted X: %i, Y: %i\n", 
		     ghmm_dpseq_get_char(X, mo->s[i].alphabet, u + start_x),
		     ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
	    }
	    if (get_phi_prop(pv, u, v, 0, 0, i) != 1) {
	      set_phi_prop(pv, u, v, i, get_phi_prop(pv, u, v, 0, 0, i) + log_b_i);
	    }
	  }
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/

    } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
    push_back_phi_prop(pv, Y->length);
#ifdef DEBUG
    if (start != NULL && mo->s[start->state].offset_y == 0) {
      max_value = -DBL_MAX;
      i = -1;     
      y = -1;
      off_x = -1;
      int x;
      for (x = 0; x<=mo->max_offset_x; x++)
	for (v = - mo->max_offset_y; v<Y->length; v++) {
	  for (j=0; j<mo->N; j++) {
	    if (get_phi_prop(pv, x, v, x, 0, j) >= max_value && 
		get_phi_prop(pv, x, v, x, 0, j) < 1 - PROP_EPS) {
	      max_value = get_phi_prop(pv, x, v, x, 0, j);
	      i = j;
	      off_x = x;
	      y = v;
	    }
	  }
	}
      printf("u = %i start_x = %i off_x = %i ", u, start_x, off_x);
      printf("max log prob state %i at (%i, %i) = %f after pushback\n",
	     i, start_x + u - (off_x - 1), y, get_phi_prop(pv, u, y, off_x, 0, i));
    }
#endif
  } /* End for u in X */
#undef CUR_PROC
}
示例#7
0
/*============================================================================*/
int *ghmm_dpmodel_viterbi_variable_tb(ghmm_dpmodel *mo, ghmm_dpseq * X, ghmm_dpseq * Y,
				 double *log_p, int *path_length,
				 int start_traceback_with) {
#define CUR_PROC "ghmm_dpmodel_viterbi"
  int u, v, j, i, off_x, off_y, current_state_index;
  double value, max_value, previous_prob;  
  plocal_store_t *pv;
  int *state_seq = NULL;
  int emission;
  double log_b_i, log_in_a_ij;
  double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, int, int);

  /* printf("---- viterbi -----\n"); */
  i_list * state_list;
  state_list = ighmm_list_init_list();
  log_in_a = &sget_log_in_a;
  /* int len_path  = mo->N*len; the length of the path is not known apriori */

/*   if (mo->model_type & kSilentStates &&  */
/*       mo->silent != NULL &&  */
/*       mo->topo_order == NULL) { */
/*     ghmm_dmodel_topo_order( mo );  */
/*   } */

  /* Allocate the matrices log_in_a, log_b,Vektor phi, phi_new, Matrix psi */
  pv = pviterbi_alloc(mo, X->length, Y->length);
  if (!pv)                        { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; }

  /* Precomputing the log(a_ij) and log(bj(ot)) */
  pviterbi_precompute(mo, pv);
  /* Initialize the lookback matrix (for positions [-offsetX,0], [-1, len_y]*/
  init_phi(pv, X, Y);
  
  /* u > max_offset_x , v starts -1 to allow states with offset_x == 0 
     which corresponds to a series of gap states before reading the first 
     character of x at position x=0, y=v */
  /** THIS IS THE MAIN RECURRENCE **/
  for (u = mo->max_offset_x + 1; u < X->length; u++) {
    for (v = -mo->max_offset_y; v < Y->length; v++) {
      for (j = 0; j < mo->N; j++) 
	{
	  /** initialization of phi (lookback matrix), psi (traceback) **/
	  set_phi(pv, u, v, j, +1);
	  set_psi(pv, u, v, j, -1);
	}
      
      for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	  max_value = -DBL_MAX;
	  set_psi(pv, u, v, i, -1);
	  for (j = 0; j < mo->s[i].in_states; j++) {
	    /* look back in the phi matrix at the offsets */
	    previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, mo->s[i].offset_y, mo->s[i].in_id[j]);
	    log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	    if ( previous_prob != +1 && log_in_a_ij != +1) {
	      value = previous_prob + log_in_a_ij;
	      if (value > max_value) {
		max_value = value;
		set_psi(pv, u, v, i, mo->s[i].in_id[j]);
	      }
	    }
	    else
	      {;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	  }

	  emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
			      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
			      mo->size_of_alphabet[mo->s[i].alphabet],
			      mo->s[i].offset_x, mo->s[i].offset_y);
#ifdef DEBUG
	  if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	    printf("State %i\n", i);
	    ghmm_dpmodel_state_print(&(mo->s[i]));
	    printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		   ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		   ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		   mo->size_of_alphabet[mo->s[i].alphabet],
		   ghmm_dpmodel_emission_table_size(mo, i), emission);
	  }
#endif
	  log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				      ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				      mo->size_of_alphabet[mo->s[i].alphabet],
				      mo->s[i].offset_x, mo->s[i].offset_y));

	  /* No maximum found (that is, state never reached)
	     or the output O[t] = 0.0: */
	  if (max_value == -DBL_MAX ||/* and then also: (v->psi[t][j] == -1) */
	      log_b_i == +1 ) {
	    set_phi(pv, u, v, i, +1);
	  }
	  else
	    set_phi(pv, u, v, i, max_value + log_b_i);
	}
      } /* complete time step for emitting states */
    
	/* last_osc = osc; */ 
        /* save last transition class */

      /*if (mo->model_type & kSilentStates) { 
	p__viterbi_silent( mo, t, v );
	}*/ /* complete time step for silent states */
      
      /**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/
    } /* End for v in Y */
    /* Next character in X */
    push_back_phi(pv, Y->length);
  } /* End for u in X */

  /* Termination */
  max_value = -DBL_MAX;
  ighmm_list_append(state_list, -1);
  /* if start_traceback_with is -1 (it is by default) search for the most 
     likely state at the end of both sequences */
  if (start_traceback_with == -1) {
    for (j = 0; j < mo->N; j++){
#ifdef DEBUG
      printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, u, Y->length-1, 0, 0, j));
#endif
      if ( get_phi(pv, u, Y->length-1, 0, 0, j) != +1 && 
	   get_phi(pv, u, Y->length-1, 0, 0, j) > max_value) { 
	max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, j);
	state_list->last->val = j;
      }
    }
  }
  /* this is the special traceback mode for the d & c algorithm that also 
     connects the traceback to the first state of the rest of the path */
  else {
#ifdef DEBUG
    printf("D & C traceback from state %i!\n", start_traceback_with);
    printf("Last characters emitted X: %i, Y: %i\n", 
	   ghmm_dpseq_get_char(X, mo->s[start_traceback_with].alphabet, 
			       X->length-1),
	   ghmm_dpseq_get_char(Y, mo->s[start_traceback_with].alphabet, 
			       Y->length-1));
    for (j = 0; j < mo->N; j++){
      printf("phi(len_x)(len_y)(%i)=%f\n", j, get_phi(pv, X->length-1, Y->length-1, 0, 0, j)); 
    }
#endif
    max_value = get_phi(pv, X->length-1, Y->length-1, 0, 0, start_traceback_with);
    if (max_value != 1 && max_value > -DBL_MAX)
      state_list->last->val = start_traceback_with;
  }
  if (max_value == -DBL_MAX) {
    /* Sequence can't be generated from the model! */
    *log_p = +1;
    /* Backtracing doesn't work, because state_seq[*] allocated with -1 */
    /* for (t = len - 2; t >= 0; t--)
       state_list->last->val = -1;    */
  }
  else {
    /* Backtracing, should put DEL path nicely */
    *log_p = max_value;
    /* removed the handling of silent states here */
    /* start trace back at the end of both sequences */
    u = X->length - 1;
    v = Y->length - 1;
    current_state_index = state_list->first->val;
    off_x = mo->s[current_state_index].offset_x;
    off_y = mo->s[current_state_index].offset_y;
    while (u - off_x >= -1 && v - off_y >= -1 && current_state_index != -1) { 
      /* while (u > 0 && v > 0) { */
      /* look up the preceding state and save it in the first position of the
	 state list */
      /* printf("Current state %i at (%i,%i) -> preceding state %i\n", 
	 current_state_index, u, v, get_psi(pv, u, v, current_state_index)); */
      /* update the current state */
      current_state_index = get_psi(pv, u, v, current_state_index);
      if (current_state_index != -1)
	ighmm_list_insert(state_list, current_state_index);
      /* move in the alignment matrix */
      u -= off_x;
      v -= off_y; 
      /* get the next offsets */
      off_x = mo->s[current_state_index].offset_x;
      off_y = mo->s[current_state_index].offset_y;
    }
  }
  
  /* Free the memory space */
  pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x , 
		mo->max_offset_y);
  /* printf("After traceback: last state = %i\n", state_list->last->val); */
  state_seq = ighmm_list_to_array(state_list);
  *path_length = state_list->length;
  /* PRINT PATH */
  
/*   fprintf(stderr, "Viterbi path: " ); */
/*   int t; */
/*   for(t=0; t < *path_length; t++) */
/*     if (state_seq[t] >= 0) fprintf(stderr, " %d ",  state_seq[t]); */
/*   fprintf(stderr, "\n Freeing ... \n");  */
  return (state_seq);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  /* Free the memory space */
  pviterbi_free(&pv, mo->N, X->length, Y->length, mo->max_offset_x, 
		mo->max_offset_y);
  m_free(state_seq);
  ighmm_list_free(state_list);
  return NULL;
#undef CUR_PROC
} /* viterbi */
示例#8
0
/*============================================================================*/
static void init_phi(plocal_store_t * pv, ghmm_dpseq * X, ghmm_dpseq * Y) {
#ifdef DEBUG
  int emission;
#endif
  int u, v, j, i, off_x, y;
  double log_in_a_ij;
  double value, max_value, previous_prob, log_b_i;  
  /* printf("ghmm_dpmodel_viterbi init\n"); */
  ghmm_dpmodel * mo = pv->mo;
  double (*log_in_a)(plocal_store_t*, int, int, ghmm_dpseq*, ghmm_dpseq*, 
		     int, int);
  log_in_a = &sget_log_in_a;

  /* Initialize the lookback matrix (for positions [-offsetX,0], [0, len_y]*/
  for (off_x=0; off_x<mo->max_offset_x + 1; off_x++)
    for (y=0; y<Y->length + mo->max_offset_y + 1; y++)
      for (j=0; j<mo->N; j++) {
	pv->phi[off_x][y][j] = +1;
      }
    if ( mo->model_type & GHMM_kSilentStates ) { /* could go into silent state at t=0 */

    /*p__viterbi_silent( mo, t=0, v);*/
  }
  /*for (j = 0; j < mo->N; j++)
    {
      printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
    }

  for( i = 0; i < mo->N; i++){
    printf("%d\t", former_matchcount[i]);
  }
  for (i = 0; i < mo->N; i++){
    printf("%d\t", recent_matchcount[i]);
  }*/
  
  /* initialize for offsets > 1 (u < max_offset_x, v < max_offset_y) */
  /* this is in principle the same as the main recurrence but adds initial
     probabilities to states that cannot be inhabitated at u=0, v=0 because
     of greater offsets than one 
     iteration start is u=-1 v=-1 to allow states with offset_x == 0 
     which corresponds to a series of gap states before reading the first 
     character of x at position x=0, y=v or equally for offset_y == 0 */
  /* u, v <= max offsets */
    for (u = -1; u <= mo->max_offset_x; u++) {
      for (v = -mo->max_offset_y; v < Y->length; v++) {
	for (j = 0; j < mo->N; j++) 
	  {
	    /** initialization of phi (lookback matrix), psi (traceback) **/
	    set_phi(pv, u, v, j, +1);
	    set_psi(pv, u, v, j, -1);
	  }
	/* for each state i */
	for (i = 0; i < mo->N; i++) {
	/* Determine the maximum */
	/* max_phi = phi[i] + log_in_a[j][i] ... */
	  if (!(mo->model_type & GHMM_kSilentStates) || !mo->silent[i] ) {
	    max_value = -DBL_MAX;
	    set_psi(pv, u, v, i, -1);
	    for (j = 0; j < mo->s[i].in_states; j++) {
	      /* look back in the phi matrix at the offsets */
	      previous_prob = get_phi(pv, u, v, mo->s[i].offset_x, 
				      mo->s[i].offset_y, mo->s[i].in_id[j]);
	      log_in_a_ij = (*log_in_a)(pv, i, j, X, Y, u, v);
	      if ( previous_prob != +1 && log_in_a_ij != +1) {
		value = previous_prob + log_in_a_ij;
		if (value > max_value) {
		  max_value = value;
		  set_psi(pv, u, v, i, mo->s[i].in_id[j]);
		}
	      }
	      else
		{;} /* fprintf(stderr, " %d --> %d = %f, \n", i,i,v->log_in_a[i][i]); */
	    }
#ifdef DEBUG
	    emission = ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
				ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
				mo->size_of_alphabet[mo->s[i].alphabet],
				mo->s[i].offset_x, mo->s[i].offset_y);
	    if (emission > ghmm_dpmodel_emission_table_size(mo, i)){
	      printf("State %i\n", i);
	      ghmm_dpmodel_state_print(&(mo->s[i]));
	      printf("charX: %i charY: %i alphabet size: %i emission table: %i emission index: %i\n", 
		     ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		     ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
		     mo->size_of_alphabet[mo->s[i].alphabet],
		     ghmm_dpmodel_emission_table_size(mo, i), emission);
	    }
#endif
	    log_b_i = log_b(pv, i, ghmm_dpmodel_pair(ghmm_dpseq_get_char(X, mo->s[i].alphabet, u), 
					ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v),
					mo->size_of_alphabet[mo->s[i].alphabet],
					mo->s[i].offset_x, mo->s[i].offset_y));
	    
	    /* this is the difference from the main loop:
	       check whether this state could be an initial state and add the
	       initial probability */
	    if (log_b_i == +1 ) {
	      set_phi(pv, u, v, i, +1);
	    }
	    else {
	      if (max_value == -DBL_MAX)
		set_phi(pv, u, v, i, +1);
	      else
		set_phi(pv, u, v, i, max_value);
	      /* if (mo->s[i].pi != 0 && mo->s[i].offset_x - 1 == u && 
		 mo->s[i].offset_y - 1 == v) { */
	      if (mo->s[i].log_pi != 1 && mo->s[i].offset_x - 1 == u && 
		  mo->s[i].offset_y - 1 == v) {
		set_phi(pv, u, v, i, mo->s[i].log_pi);
#ifdef DEBUG
		printf("Initial log prob state %i at (%i, %i) = %f\n", i, u, v, get_phi(pv, u, v, 0, 0, i));
		printf("Characters emitted X: %i, Y: %i\n", 
		       ghmm_dpseq_get_char(X, mo->s[i].alphabet, u),
		       ghmm_dpseq_get_char(Y, mo->s[i].alphabet, v));
#endif
	      }
	      if (get_phi(pv, u, v, 0, 0, i) != 1)
		set_phi(pv, u, v, i, get_phi(pv, u, v, 0, 0, i) + log_b_i);
	    }
	  }
	  /* if (v == 0) {
	     printf"(%i, %i, %i) preceding %i\n", u, v, i, pv->psi[u][v][i]);
	     } */
	} /* complete time step for emitting states */
	
	/* last_osc = osc; */
	/* save last transition class */
	
	/*if (mo->model_type & kSilentStates) { 
	  p__viterbi_silent( mo, t, v );
	  }*/ /* complete time step for silent states */
	
	/**************
    for (j = 0; j < mo->N; j++) 
      {      
	printf("\npsi[%d],in:%d, phi=%f\n", t, v->psi[t][j], v->phi[j]);
       }
      
    for (i = 0; i < mo->N; i++){
      printf("%d\t", former_matchcount[i]);
    }

    for (i = 0; i < mo->N; i++){
      printf("%d\t", recent_matchcount[i]);
    }
      ****************/
      } /* End for v in Y */
    /* Next character in X */
    /* push back the old phi values */
      push_back_phi(pv, Y->length);
    } /* End for u in X */
}