Exemple #1
0
/** allocates memory for m and n matrices: */
static int gradient_descent_galloc (double ***matrix_b, double **matrix_a,
                             double **matrix_pi, ghmm_dmodel * mo)
{
#define CUR_PROC "gradient_descent_galloc"

  int i;

  /* first allocate memory for matrix_b */
  ARRAY_MALLOC (*matrix_b, mo->N);
  for (i = 0; i < mo->N; i++)
    ARRAY_CALLOC ((*matrix_b)[i], ghmm_ipow (mo, mo->M, mo->order[i] + 1));

  /* matrix_a(i,j) = matrix_a[i*mo->N+j] */
  ARRAY_CALLOC (*matrix_a, mo->N * mo->N);

  /* allocate memory for matrix_pi */
  ARRAY_CALLOC (*matrix_pi, mo->N);

  return 0;

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  gradient_descent_gfree (*matrix_b, *matrix_a, *matrix_pi, mo->N);
  return -1;

#undef CUR_PROC
}
//only uses first sequence
int* ghmm_bayes_hmm_fbgibbs(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq,
         int burnIn, int seed){
#define CUR_PROC "ghmm_cmodel_fbgibbs"
    //XXX seed
    GHMM_RNG_SET (RNG, seed);
    int max_seq = ghmm_cseq_max_len(seq);
    double **alpha = ighmm_cmatrix_alloc(max_seq,mo->N);
    double ***pmats = ighmm_cmatrix_3d_alloc(max_seq, mo->N, mo->N);
    int **Q; 
    ARRAY_CALLOC(Q, seq->seq_number);
    int seq_iter;
    for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){
        ARRAY_CALLOC(Q[seq_iter], seq->seq_len[seq_iter]);
    }

    ghmm_sample_data data;
    ghmm_alloc_sample_data(bayes, &data);
    ghmm_clear_sample_data(&data, bayes);//XXX swap parameter
    for(; burnIn > 0; burnIn--){
        for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){
            ghmm_cmodel_fbgibbstep(mo,seq->seq[seq_iter],seq->seq_len[seq_iter], Q[seq_iter],
                    alpha, pmats, NULL);
            ghmm_get_sample_data(&data, bayes, Q[seq_iter], seq->seq[seq_iter], 
                    seq->seq_len[seq_iter]); 
            ghmm_update_model(mo, bayes, &data);
            ghmm_clear_sample_data(&data, bayes);
        }
    }
    ighmm_cmatrix_free(&alpha, max_seq);
    ighmm_cmatrix_3d_free(&pmats, max_seq,mo->N);
    return Q;
STOP:
    return NULL; //XXX error handle
#undef CUR_PROC
}
Exemple #3
0
/*============================================================================*/
int ghmm_cmodel_logp (ghmm_cmodel * smo, double *O, int T, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp"
  int res = -1;
  double **alpha, *scale = NULL;

  alpha = ighmm_cmatrix_stat_alloc (T, smo->N);
  if (!alpha) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (scale, T);
  /* run forward alg. */
  if (ghmm_cmodel_forward (smo, O, T, NULL, alpha, scale, log_p) == -1) {
    /* GHMM_LOG_QUEUED(LCONVERTED); */
    goto STOP;
  }
  res = 0;

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  ighmm_cmatrix_stat_free (&alpha);
  m_free (scale);
  return (res);
# undef CUR_PROC
}                               /* ghmm_cmodel_logp */
Exemple #4
0
int ghmm_dpmodel_state_alloc(ghmm_dpstate * s, int M, int in_states, int out_states) {
# define CUR_PROC "ghmm_dpmodel_state_alloc"
  int res = -1;
  ARRAY_CALLOC (s->b, M);
  if (out_states > 0) {
    ARRAY_CALLOC (s->out_id, out_states);
    ARRAY_CALLOC (s->out_a, out_states);
  }
  if (in_states > 0) {
    ARRAY_CALLOC (s->in_id, in_states);
    ARRAY_CALLOC (s->in_a, in_states);
  }
  res = 0;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return(res);
# undef CUR_PROC
} /* ghmm_dpmodel_state_alloc */
Exemple #5
0
/*----------------------------------------------------------------------------*/
static local_store_topo *topo_alloc (ghmm_dmodel * mo, int len)
{
#define CUR_PROC "sdtopo_alloc"
  local_store_topo *v = NULL;

  ARRAY_CALLOC (v, 1);
  ARRAY_CALLOC (v->queue, mo->N);

  v->topo_order_length = 0;
  v->head = 0;                  /* initialize static queue (array implementation) */
  v->tail = 0;
  ARRAY_CALLOC (v->topo_order, mo->N);

  return (v);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  topo_free (&v, mo->N, len);
  return (NULL);
#undef CUR_PROC
}
Exemple #6
0
/* use this to allocate the memory for a ghmm_dpmodel and set the pointers to NULL */
ghmm_dpmodel * ghmm_dpmodel_init() {
#define CUR_PROC "ghmm_dpmodel_init"
  ghmm_dpmodel * mo;
  ARRAY_CALLOC (mo, 1);

  return mo;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return NULL;
#undef CUR_PROC
}
Exemple #7
0
ghmm_dpmodel_class_change_context * ghmm_dpmodel_init_class_change() {
#define CUR_PROC "ghmm_dpmodel_init_class_change"
  ghmm_dpmodel_class_change_context * pccc;
  ARRAY_CALLOC (pccc, 1);

  pccc->get_class = &ghmm_dpmodel_default_transition_class;
  pccc->user_data = NULL;
  return pccc;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return NULL;
#undef CUR_PROC
}
Exemple #8
0
/*----------------------------------------------------------------------------*/
static void discrime_print_statistics(ghmm_dmodel** mo, ghmm_dseq** sqs, int noC,
                                int* falseP, int* falseN)
{
#define CUR_PROC "discrime_print_statistics"

  int k, l, m;
  int argmax;
  double *logp, max;

  ghmm_dseq *sq;

  ARRAY_CALLOC (logp, noC);

  for (k = 0; k < noC; k++) {
    falseP[k] = 0;
    falseN[k] = 0;
  }

  for (k = 0; k < noC; k++) {
    sq = sqs[k];
    printf ("Looking at training tokens of Class %d\n", k);
    for (l = 0; l < sq->seq_number; l++) {
      argmax = 0, max = -DBL_MAX;
      for (m = 0; m < noC; m++) {
        ghmm_dmodel_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
        if (m == 0 || max < logp[m]) {
          max = logp[m];
          argmax = m;
        }
      }

      if (sq->seq_number < 11 && noC < 8) {
        /* printing fancy statistics */
        printf ("%2d: %8.4g", l, logp[0] - logp[argmax]);
        for (m = 1; m < noC; m++)
          printf (",  %8.4g", logp[m] - logp[argmax]);
        printf ("  \t+(%g)\n", logp[argmax]);
      }

      /* counting false positives and false negatives */
      if (argmax != k) {
        falseP[argmax]++;
        falseN[k]++;
      }
    }
    printf ("%d false negatives in class %d.\n", falseN[k], k);
  }
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  m_free (logp);
  return;
#undef CUR_PROC
}
Exemple #9
0
i_el * ighmm_list_init_el(int val) {
#define CUR_PROC "ighmm_list_init_el"
  i_el * el;
  ARRAY_CALLOC (el, 1);
  el->next = NULL;
  el->val = val;
  return el;

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  free(el);
  return NULL;
#undef CUR_PROC
}
Exemple #10
0
i_list * ighmm_list_init_list() {
#define CUR_PROC "ighmm_list_init_list"
  i_list * list;

  ARRAY_CALLOC (list, 1);
  list->first = NULL;
  list->last = NULL;
  list->length = 0;
  return list;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  ighmm_list_free(list);
  return NULL;
#undef CUR_PROC
}
Exemple #11
0
/*===========================================================================*/
static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {
#define CUR_PROC "parseAlphabet"

  char * str;
  int M, code, error;

  xmlNodePtr symbol;
  ghmm_alphabet * alfa;

  ARRAY_CALLOC(alfa, 1);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      code = getIntAttribute(symbol, "code", &error);
      if (error || code!=M) {
        str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M);
        GHMM_LOG(LERROR, str);
        m_free(str);
        goto STOP;
      } else
        M++;
    }
    symbol=symbol->next;
  }

  alfa->size = M;
  /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/
  ARRAY_MALLOC(alfa->symbols, M);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol);
      /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/
    }
    symbol=symbol->next;
  }

  return alfa;
STOP:
  m_free(alfa->symbols);
  m_free(alfa)
  return NULL;
#undef CUR_PROC
}
Exemple #12
0
/* ========================================================================= */
static char * strModeltype(int modelType) {
#define CUR_PROC "strModeltype"

  int end;
  char * mt;

  ARRAY_CALLOC(mt, 200);

  if (modelType > 0) {
    if (modelType & GHMM_kLeftRight)
      strcat(mt, "left-right ");
    if (modelType & GHMM_kSilentStates)
      strcat(mt, "silent ");
    if (modelType & GHMM_kTiedEmissions)
      strcat(mt, "tied ");
    if (modelType & GHMM_kHigherOrderEmissions)
      strcat(mt, "higher-order ");
    if (modelType & GHMM_kBackgroundDistributions)
      strcat(mt, "background ");
    if (modelType & GHMM_kLabeledStates)
      strcat(mt, "labeled ");
    if (modelType & GHMM_kTransitionClasses)
      strcat(mt, "transition-classes ");
    if (modelType & GHMM_kDiscreteHMM)
      strcat(mt, "discrete ");
    if (modelType & GHMM_kContinuousHMM)
      strcat(mt, "continuous ");
    if (modelType & GHMM_kPairHMM)
      strcat(mt, "pair ");
    if (modelType & GHMM_kMultivariate)
      strcat(mt, "multivariate ");
  } else {
    GHMM_LOG(LERROR, "can't write models with unspecified modeltype");
    goto STOP;
  }

  /* overwrite the last space */
  end = strlen(mt);
  mt[end-1] = '\0';

  return mt;
 STOP:
  m_free(mt);
  return NULL;
#undef CUR_PROC
}
/*============================================================================*/
static cell * init_cell (int x, int y, int state, int previous_state,
			 double log_p, double log_a) {
#define CUR_PROC "init_cell"
  cell * mcell;
  ARRAY_CALLOC (mcell, 1);
  /* printf("Alloc cell: %i\n", sizeof(*mcell)); */
  mcell->x = x;
  mcell->y = y;
  mcell->state = state;
  mcell->previous_state = previous_state;
  mcell->log_p = log_p;
  mcell->log_a = log_a;
  return mcell;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return(NULL);
#undef CUR_PROC
}
Exemple #14
0
/*===========================================================================*/
static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) {
#define CUR_PROC "parseCSVList"

  int retval=0;
  int i;
  char * * next, * estr;
  double tmp;

  ARRAY_CALLOC(next, 1);

  for (i=0; i<size; i++) {
    array[i] = strtod(data, next);
    if (data == *next) {
      estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next);
      GHMM_LOG(LERROR, estr);
      m_free(estr);
      retval=-1;
      break;
    }
    if (next)
      data = *next+1;
    else
      break;
  }

  if (i != size) {
    retval=-1;
    estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size);
    GHMM_LOG(LERROR, estr);
    m_free(estr);
  }

  if (reverse) {
    for (i=0; i<size/2; i++) {
      tmp = array[i];
      array[i] = array[size-i-1];
      array[size-i-1] = tmp;
    }
  }

STOP:
  m_free(next);
  return retval;
#undef CUR_PROC
}
Exemple #15
0
int * ighmm_list_to_array(i_list * list) {
#define CUR_PROC "ighmm_list_to_array"
  int counter = 0;
  int * array;
  i_el * el;
  ARRAY_CALLOC (array, list->length);
  el = list->first;
  while(el != NULL) {
    array[counter] = el->val;
    el = el->next;
    counter++;
  }
  return array;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  m_free(array);
  return NULL;
#undef CUR_PROC
}
Exemple #16
0
int* ghmm_bayes_hmm_fbgibbs_compressed(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq,
         int burnIn, int seed, double width, double delta, int max_len_permitted){
#define CUR_PROC "ghmm_cmodel_fbgibbs"
    //XXX seed
    GHMM_RNG_SET (RNG, seed);

    block_stats *stats = compress_observations(seq, width*delta, delta);
    stats = merge_observations(seq, width, max_len_permitted, stats);
    print_stats(stats, seq->seq_len[0]);
    //get max_block_len
    int max_block_len = stats->length[0];
    int i;
    for(i = 1; i < stats->total; i++){
        if(max_block_len < stats->length[i])
            max_block_len = stats->length[i];
    }
    //printf("max b len %d\n", max_block_len);
    double ***b = ighmm_cmatrix_3d_alloc(stats->total, mo->N, 2);
    double **alpha = ighmm_cmatrix_alloc(seq->seq_len[0],mo->N);
    double ***pmats = ighmm_cmatrix_3d_alloc(seq->seq_len[0], mo->N, mo->N);
    int *Q; 
    ARRAY_CALLOC(Q, seq->seq_len[0]);//XXX extra length for compressed
    ghmm_sample_data data;
    ghmm_alloc_sample_data(bayes, &data);
    ghmm_clear_sample_data(&data, bayes);//XXX swap parameter 
    for(; burnIn > 0; burnIn--){
        //XXX only using seq 0
        precompute_block_emission(mo, stats, max_block_len, b);//XXX maxlen
        ghmm_cmodel_fbgibbstep(mo,seq->seq[0], stats->total, Q, alpha, pmats, b);
        ghmm_get_sample_data_compressed(&data, bayes, Q, seq->seq[0], 
                stats->total, stats); 
        ghmm_update_model(mo, bayes, &data);
        ghmm_clear_sample_data(&data, bayes);
    }
    ighmm_cmatrix_free(&alpha, seq->seq_len[0]);
    ighmm_cmatrix_3d_free(&pmats, seq->seq_len[0],mo->N);
    ighmm_cmatrix_3d_free(&b, stats->total, mo->N);
    free_block_stats(&stats);
    return Q;
STOP:
    return NULL; //XXX error handle
#undef CUR_PROC
}
Exemple #17
0
/* inserts new hypothesis into list at position indicated by pointer plist */
static void ighmm_hlist_insert (hypoList ** plist, int newhyp,
                              hypoList * parlist)
{
#define CUR_PROC "ighmm_hlist_insert"
  hypoList *newlist;

  ARRAY_CALLOC (newlist, 1);
  newlist->hyp_c = newhyp;
  if (parlist)
    parlist->refcount += 1;
  newlist->parent = parlist;
  newlist->next = *plist;

  *plist = newlist;
  return;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_hlist_insert failed\n");
  exit (1);
#undef CUR_PROC
}
Exemple #18
0
/*============================================================================*/
static plocal_store_t *pviterbi_alloc(ghmm_dpmodel *mo, int len_x, int len_y) {
#define CUR_PROC "pviterbi_alloc"
  plocal_store_t* v = NULL;
  int i, j;
  ARRAY_CALLOC (v, 1);
  v->mo = mo;
  v->len_y = len_y;
  v->len_x = len_x;
  /* Allocate the log_in_a's -> individal lenghts */
  ARRAY_CALLOC (v->log_in_a, mo->N);
  /* first index of log_in_a: target state */
  for (j = 0; j < mo->N; j++){ 
    /* second index: source state */
    ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);
    for (i=0; i<mo->s[j].in_states; i++) {
      /* third index: transition classes of source state */
      ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);
    }
  }
  ARRAY_CALLOC (v->log_b, mo->N);
  for (j=0; j<mo->N; j++) {
    ARRAY_CALLOC (v->log_b[j], ghmm_dpmodel_emission_table_size(mo, j) + 1);
  }
  if (!(v->log_b)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  v->phi = ighmm_cmatrix_3d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
  if (!(v->phi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  ARRAY_CALLOC (v->phi_new, mo->N);
  v->psi = ighmm_dmatrix_3d_alloc(len_x + mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
  if (!(v->psi)) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}

  v->topo_order_length = 0;
  ARRAY_CALLOC (v->topo_order, mo->N);

  return(v);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  pviterbi_free((&v), mo->N, len_x, len_y, mo->max_offset_x, mo->max_offset_y);
  return(NULL);
#undef CUR_PROC
} /* viterbi_alloc */
Exemple #19
0
/** allocates memory for m and n matrices: */
static int discrime_galloc (ghmm_dmodel ** mo, ghmm_dseq ** sqs, int noC,
                     double ******matrix_b, double *****matrix_a,
                     double *****matrix_pi, long double ***omega,
                     long double ****omegati, double ****log_p)
{
#define CUR_PROC "discrime_galloc"

  int i, k, l, m;

  /* first allocate memory for matrix_b: */
  ARRAY_CALLOC (*matrix_b, noC);
  for (k = 0; k < noC; k++) {
    ARRAY_CALLOC ((*matrix_b)[k], sqs[k]->seq_number);
    for (l = 0; l < sqs[k]->seq_number; l++) {
      ARRAY_CALLOC ((*matrix_b)[k][l], noC);
      for (m = 0; m < noC; m++) {
        ARRAY_CALLOC ((*matrix_b)[k][l][m], mo[m]->N);
        for (i = 0; i < mo[m]->N; i++)
          ARRAY_CALLOC ((*matrix_b)[k][l][m][i], ghmm_ipow (mo[m], mo[m]->M, mo[m]->order[i] + 1));
      }
    }
  }

  /* matrix_a(k,l,i,j) = matrix_a[k][l][i*mo->N + j] */
  ARRAY_CALLOC (*matrix_a, noC);
  for (k = 0; k < noC; k++) {
    ARRAY_CALLOC ((*matrix_a)[k], sqs[k]->seq_number);
    for (l = 0; l < sqs[k]->seq_number; l++) {
      ARRAY_CALLOC ((*matrix_a)[k][l], noC);
      for (m = 0; m < noC; m++)
        ARRAY_CALLOC ((*matrix_a)[k][l][m], mo[m]->N * mo[m]->N);
    }
  }

  /* allocate memory for matrix_pi */
  ARRAY_CALLOC (*matrix_pi, noC);
  for (k = 0; k < noC; k++) {
    ARRAY_CALLOC ((*matrix_pi)[k], sqs[k]->seq_number);
    for (l = 0; l < sqs[k]->seq_number; l++) {
      ARRAY_CALLOC ((*matrix_pi)[k][l], noC);
      for (m = 0; m < noC; m++)
        ARRAY_CALLOC ((*matrix_pi)[k][l][m], mo[m]->N);
    }
  }

  /* allocate memory for matrices of likelihoods 
     log_p[k][l][m] =
     log_prob of l-th sequence of k-th class under the m-th ghmm_dmodel */
  ARRAY_CALLOC (*log_p, noC);
  for (k = 0; k < noC; k++) {
    ARRAY_CALLOC ((*log_p)[k], sqs[k]->seq_number);
    for (l = 0; l < sqs[k]->seq_number; l++)
      ARRAY_CALLOC ((*log_p)[k][l], noC);
  }

  /* allocate memory for outer derivatives */
  ARRAY_CALLOC (*omega, noC);
  for (k = 0; k < noC; k++)
    ARRAY_CALLOC ((*omega)[k], sqs[k]->seq_number);

  /* allocate memory for omega tilde. NB: size(omega)*noC == size(omegati) */
  ARRAY_CALLOC (*omegati, noC);
  for (k = 0; k < noC; k++) {
    ARRAY_CALLOC ((*omegati)[k], sqs[k]->seq_number);
    for (l = 0; l < sqs[k]->seq_number; l++)
      ARRAY_CALLOC ((*omegati)[k][l], noC);
  }

  return 0;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  discrime_gfree (mo, sqs, noC, *matrix_b, *matrix_a, *matrix_pi,
                  *omega, *omegati, *log_p);
  return -1;
#undef CUR_PROC
}
Exemple #20
0
/*============================================================================*/
int *ghmm_dmodel_label_kbest (ghmm_dmodel * mo, int *o_seq, int seq_len, int k, double *log_p)
{
#define CUR_PROC "ghmm_dl_kbest"
  int i, t, c, l, m;            /* counters */
  int no_oldHyps;               /* number of hypotheses until position t-1 */
  int b_index, i_id;            /* index for addressing states' b arrays */
  int no_labels = 0;
  int exists, g_nr;
  int *states_wlabel;
  int *label_max_out;
  char *str;

  /* logarithmized transition matrix A, log(a(i,j)) => log_a[i*N+j],
     1.0 for zero probability */
  double **log_a;

  /* matrix of hypotheses, holds for every position in the sequence a list
     of hypotheses */
  hypoList **h;
  hypoList *hP;

  /* vectors for rows in the matrices */
  int *hypothesis;

  /* pointer & prob. of the k most probable hypotheses for each state
     - matrices of dimensions #states x k:  argm(i,l) => argmaxs[i*k+l] */
  double *maxima;
  hypoList **argmaxs;

  /* pointer to & probability of most probable hypothesis in a certain state */
  hypoList *argmax;
  double sum;

  /* break if sequence empty or k<1: */
  if (seq_len <= 0 || k <= 0)
    return NULL;

  ARRAY_CALLOC (h, seq_len);

  /* 1. Initialization (extend empty hypothesis to #labels hypotheses of
         length 1): */

  /* get number of labels (= maximum label + 1)
     and number of states with those labels */
  ARRAY_CALLOC (states_wlabel, mo->N);
  ARRAY_CALLOC (label_max_out, mo->N);
  for (i = 0; i < mo->N; i++) {
    c = mo->label[i];
    states_wlabel[c]++;
    if (c > no_labels)
      no_labels = c;
    if (mo->s[i].out_states > label_max_out[c])
      label_max_out[c] = mo->s[i].out_states;
  }
  /* add one to the maximum label to get the number of labels */
  no_labels++;
  ARRAY_REALLOC (states_wlabel, no_labels);
  ARRAY_REALLOC (label_max_out, no_labels);

  /* initialize h: */
  hP = h[0];
  for (i = 0; i < mo->N; i++) {
    if (mo->s[i].pi > KBEST_EPS) {
      /* printf("Found State %d with initial probability %f\n", i, mo->s[i].pi); */
      exists = 0;
      while (hP != NULL) {
        if (hP->hyp_c == mo->label[i]) {
          /* add entry to the gamma list */
          g_nr = hP->gamma_states;
          hP->gamma_id[g_nr] = i;
          hP->gamma_a[g_nr] =
            log (mo->s[i].pi) +
            log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
          hP->gamma_states = g_nr + 1;
          exists = 1;
          break;
        }
        else
          hP = hP->next;
      }
      if (!exists) {
        ighmm_hlist_insert (&(h[0]), mo->label[i], NULL);
        /* initiallize gamma-array with safe size (number of states) and add the first entry */
        ARRAY_MALLOC (h[0]->gamma_a, states_wlabel[mo->label[i]]);
        ARRAY_MALLOC (h[0]->gamma_id, states_wlabel[mo->label[i]]);
        h[0]->gamma_id[0] = i;
        h[0]->gamma_a[0] =
          log (mo->s[i].pi) +
          log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
        h[0]->gamma_states = 1;
        h[0]->chosen = 1;
      }
      hP = h[0];
    }
  }
  /* reallocating the gamma list to the real size */
  hP = h[0];
  while (hP != NULL) {
    ARRAY_REALLOC (hP->gamma_a, hP->gamma_states);
    ARRAY_REALLOC (hP->gamma_id, hP->gamma_states);
    hP = hP->next;
  }

  /* calculate transition matrix with logarithmic values: */
  log_a = kbest_buildLogMatrix (mo->s, mo->N);

  /* initialize temporary arrays: */
  ARRAY_MALLOC (maxima, mo->N * k);                             /* for each state save k */
  ARRAY_MALLOC (argmaxs, mo->N * k);


  /*------ Main loop: Cycle through the sequence: ------*/
  for (t = 1; t < seq_len; t++) {

    /* put o_seq[t-1] in emission history: */
    update_emission_history (mo, o_seq[t - 1]);

    /* 2. Propagate hypotheses forward and update gamma: */
    no_oldHyps =
      ighmm_hlist_prop_forward (mo, h[t - 1], &(h[t]), no_labels, states_wlabel,
                     label_max_out);
    /* printf("t = %d (%d), no of old hypotheses = %d\n", t, seq_len, no_oldHyps); */

    /*-- calculate new gamma: --*/
    hP = h[t];
    /* cycle through list of hypotheses */
    while (hP != NULL) {

      for (i = 0; i < hP->gamma_states; i++) {
        /* if hypothesis hP ends with label of state i:
           gamma(i,c):= log(sum(exp(a(j,i)*exp(oldgamma(j,old_c)))))
           + log(b[i](o_seq[t]))
           else: gamma(i,c):= -INF (represented by 1.0) */
        i_id = hP->gamma_id[i];
        hP->gamma_a[i] = ighmm_log_gamma_sum (log_a[i_id], &mo->s[i_id], hP->parent);
        b_index = get_emission_index (mo, i_id, o_seq[t], t);
        if (b_index < 0) {
          hP->gamma_a[i] = 1.0;
          if (mo->order[i_id] > t)
            continue;
          else {
            str = ighmm_mprintf (NULL, 0,
                           "i_id: %d, o_seq[%d]=%d\ninvalid emission index!\n",
                           i_id, t, o_seq[t]);
            GHMM_LOG(LCONVERTED, str);
            m_free (str);
          }
        }
        else
          hP->gamma_a[i] += log (mo->s[i_id].b[b_index]);
        /*printf("%g = %g\n", log(mo->s[i_id].b[b_index]), hP->gamma_a[i]); */
        if (hP->gamma_a[i] > 0.0) {
          GHMM_LOG(LCONVERTED, "gamma to large. ghmm_dl_kbest failed\n");
          exit (1);
        }
      }
      hP = hP->next;
    }

    /* 3. Choose the k most probable hypotheses for each state and discard all
	   hypotheses that were not chosen: */

    /* initialize temporary arrays: */
    for (i = 0; i < mo->N * k; i++) {
      maxima[i] = 1.0;
      argmaxs[i] = NULL;
    }

    /* cycle through hypotheses & calculate the k most probable hypotheses for
       each state: */
    hP = h[t];
    while (hP != NULL) {
      for (i = 0; i < hP->gamma_states; i++) {
        i_id = hP->gamma_id[i];
        if (hP->gamma_a[i] > KBEST_EPS)
          continue;
        /* find first best hypothesis that is worse than current hypothesis: */
        for (l = 0;
             l < k && maxima[i_id * k + l] < KBEST_EPS
             && maxima[i_id * k + l] > hP->gamma_a[i]; l++);
        if (l < k) {
          /* for each m>l: m'th best hypothesis becomes (m+1)'th best */
          for (m = k - 1; m > l; m--) {
            argmaxs[i_id * k + m] = argmaxs[i_id * k + m - 1];
            maxima[i_id * k + m] = maxima[i_id * k + m - 1];
          }
          /* save new l'th best hypothesis: */
          maxima[i_id * k + l] = hP->gamma_a[i];
          argmaxs[i_id * k + l] = hP;
        }
      }
      hP = hP->next;
    }

    /* set 'chosen' for all hypotheses from argmaxs array: */
    for (i = 0; i < mo->N * k; i++)
      /* only choose hypotheses whose prob. is at least threshold*max_prob */
      if (maxima[i] != 1.0
          && maxima[i] >= KBEST_THRESHOLD + maxima[(i % mo->N) * k])
        argmaxs[i]->chosen = 1;

    /* remove hypotheses that were not chosen from the lists: */
    /* remove all hypotheses till the first chosen one */
    while (h[t] != NULL && 0 == h[t]->chosen)
      ighmm_hlist_remove (&(h[t]));
    /* remove all other not chosen hypotheses */
    if (!h[t]) {
      GHMM_LOG(LCONVERTED, "No chosen hypothesis. ghmm_dl_kbest failed\n");
      exit (1);
    }
    hP = h[t];
    while (hP->next != NULL) {
      if (1 == hP->next->chosen)
        hP = hP->next;
      else
        ighmm_hlist_remove (&(hP->next));
    }
  }
  /* dispose of temporary arrays: */
  m_free(states_wlabel);
  m_free(label_max_out);
  m_free(argmaxs);
  m_free(maxima);
  /* transition matrix is no longer needed from here on */
  for (i=0; i<mo->N; i++)
    m_free(log_a[i]);
  m_free(log_a);

  /* 4. Save the hypothesis with the highest probability over all states: */
  hP = h[seq_len - 1];
  argmax = NULL;
  *log_p = 1.0;                 /* log_p will store log of maximum summed probability */
  while (hP != NULL) {
    /* sum probabilities for each hypothesis over all states: */
    sum = ighmm_cvector_log_sum (hP->gamma_a, hP->gamma_states);
    /* and select maximum sum */
    if (sum < KBEST_EPS && (*log_p == 1.0 || sum > *log_p)) {
      *log_p = sum;
      argmax = hP;
    }
    hP = hP->next;
  }

  /* found a valid path? */
  if (*log_p < KBEST_EPS) {
    /* yes: extract chosen hypothesis: */
    ARRAY_MALLOC (hypothesis, seq_len);
    for (i = seq_len - 1; i >= 0; i--) {
      hypothesis[i] = argmax->hyp_c;
      argmax = argmax->parent;
    }
  }
  else
    /* no: return 1.0 representing -INF and an empty hypothesis */
    hypothesis = NULL;

  /* dispose of calculation matrices: */
  hP = h[seq_len - 1];
  while (hP != NULL)
    ighmm_hlist_remove (&hP);
  free (h);
  return hypothesis;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ghmm_dl_kbest failed\n");
  exit (1);
#undef CUR_PROC
}
/*============================================================================*/
static int * pviterbi_propagate_recursion (ghmm_dpmodel *mo, ghmm_dpseq * X,
					   ghmm_dpseq * Y, double *log_p,
					   int *path_length, cell *start,
					   cell *stop, double max_size,
					   plocal_propagate_store_t * pv) {
#define CUR_PROC "pviterbi_propagate_recursion"
  /* Divide and conquer algorithm to reduce the memory requirement */
  
  /* 1. Compute the alignment of X vs Y and propagate the middle point */
  
  /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and 
     X[len/2+1:len] vs Y[m+1:len] */

  /* Break the recursion if the lengths of both sequences become tractable
     for the normal ghmm_dpmodel_viterbi algorithm */

  /* start of the implementation */
  int * path_seq = NULL;
  int start_x, start_y, stop_x, stop_y;
  double * original_pi=NULL;
  int i;
  cell * middle;
  int length1, length2;
  double log_p1, log_p2;
  int * path1, * path2;

  init_start_stop(start, stop, X, Y, &start_x, &start_y, &stop_x, &stop_y);
#ifdef DEBUG
  printf("Recursion: start, stop cells\n");
  if(start)
    ghmm_dpmodel_print_cell(start);
  else
    printf("(0, 0) first segment\n");
  if(stop)
    ghmm_dpmodel_print_cell(stop);
  else
  printf("(%i, %i) last segment\n", stop_x, stop_y);
#endif
  /* Break the recursion if the lengths of both sequences become tractable
     for the normal ghmm_dpmodel_viterbi algorithm */
  if ((double)(stop_x - start_x) * (double)(stop_y - start_y) < max_size) {
    /* to use the unchanged ghmm_dpmodel_viterbi algorithm take slices of the sequences */
    ghmm_dpseq * tractable_X = ghmm_dpseq_slice(X, start_x, stop_x);
    ghmm_dpseq * tractable_Y = ghmm_dpseq_slice(Y, start_y, stop_y);
    /* if this is not the very first path segment starting at zero:
       temporarily change the initial probabability to go into state k+1 */
#ifdef DEBUG
    printf("ghmm_dpmodel_viterbi on slice  x[%i:%i], y[%i:%i]\n", 
	   start_x, stop_x, start_y, stop_y); 
#endif
    if (start != NULL) {
      ARRAY_CALLOC (original_pi, mo->N);
      /* save original pi and set all to zero */
      for (i=0; i<mo->N; i++) {
	original_pi[i] = mo->s[i].log_pi;
	mo->s[i].log_pi = 1;
      }
      /* set the initial prob. for the state that was in the middle of path */
      mo->s[start->state].log_pi = start->log_p + start->log_a;
      
      /* compute the partial path */
      if (stop != NULL)
	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);
      else
	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);
      /* restore the original model */
      for (i=0; i<mo->N; i++) 
	mo->s[i].log_pi = original_pi[i];
      m_free(original_pi);
    }
    else {
      if (stop != NULL)
	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, stop->previous_state);
      else
	path_seq = ghmm_dpmodel_viterbi_variable_tb(mo, tractable_X, tractable_Y, log_p, path_length, -1);
    }
    if (*log_p == 1) {
      fprintf(stderr, "Problem with slice x[%i:%i], y[%i:%i]\n", 
	      start_x, stop_x, start_y, stop_y);
    }
    return path_seq;
  }
  else {
    /* 1. Compute the alignment of X vs Y and propagate the middle point */
    double step_log_p = 1;
    /* if this is not the very first path segment starting at zero:
       temporarily change the initial probabability to go into state k+1 */
    if (start != NULL) {
      ARRAY_CALLOC(original_pi, mo->N);
      /* save original pi and set all to zero */
      for (i=0; i<mo->N; i++) {
	original_pi[i] = mo->s[i].log_pi;
	mo->s[i].log_pi = 1;
      }
      /* set the initial prob. for the state that was in the middle of path */
      mo->s[start->state].log_pi =  start->log_p + start->log_a;
    }
    middle = pviterbi_propagate_step(mo, X, Y, start, stop, &step_log_p, pv);
    if (start != NULL) {
      /* restore the original model */
      for (i=0; i<mo->N; i++) 
	mo->s[i].log_pi = original_pi[i];
      m_free(original_pi);
    }
    /* check if there is a middle */
    if (!middle) {
      fprintf(stderr, "(%i, %i)->(%i, %i) No middle found!\n", 
	      start_x, start_y, stop_x, stop_y);
      ARRAY_CALLOC(path_seq, 1);
      path_seq[0] = -1;
      *path_length = 1;
      *log_p = 1;
      return path_seq;
    }
#ifdef DEBUG
    else {
      printf("(%i, %i)->(%i, %i) Middlepoint ", start_x, start_y,
	     stop_x, stop_y);
      ghmm_dpmodel_print_cell(middle);
    } 
#endif
    /* check if there is a path */
    if (step_log_p == 1) {
      ARRAY_CALLOC(path_seq, 1);
      path_seq[0] = -1;
      *path_length = 1;
      *log_p = 1;
      return path_seq;
    }
    /* 2. Solve recursively the two alignments X[0:len/2] vs Y[0:m] and 
       X[len/2+1:len] vs Y[m+1:len] */
    length1 = 0;
    log_p1 = 0;
    path1 = pviterbi_propagate_recursion(mo, X, Y, &log_p1, &length1, 
					       start, middle,
					       max_size, pv);
    length2 = 0;
    log_p2 = 0;
    path2 = pviterbi_propagate_recursion(mo, X, Y, &log_p2, &length2, 
					       middle, stop,
					       max_size, pv);
    /* check the paths */
    if (log_p1 == 1 || log_p2 == 1) {
      ARRAY_CALLOC (path_seq, 1);
      path_seq[0] = -1;
      *path_length = 1;
      *log_p = 1;
      return path_seq;
    }  
    /* concatenate the paths */
    *path_length = length1 + length2;
    *log_p = log_p2;

#ifdef DEBUG
    /* check if the transition between the ends of the paths is possible */
    for (i=0; i<mo->s[path1[length1-1]].in_states; i++){
      if (mo->s[path1[length1-1]].in_id[i] == path2[0])
	break;
    }
    if (i == mo->s[path1[length1-1]].in_states) {
      printf("no transition between states %i -> %i\n", path1[length1-1], path2[0]);
    }

    printf("Conquer: start, stop cells\n");
    if(start)
      ghmm_dpmodel_print_cell(start);
    else
      printf("(0, 0) first segment\n");
    if(stop)
      ghmm_dpmodel_print_cell(stop);
    else
      printf("(%i, %i) last segment\n", stop_x, stop_y);
    printf("Path 1:\n[");
    for (i=0; i<length1; i++)
      printf("%i,", path1[i]);
    printf("\nPath 2:\n[");
    for (i=0; i<length2; i++)
      printf("%i,", path2[i]); 
    printf("]\n");
#endif
	
    ARRAY_CALLOC (path_seq, *path_length);

    for (i=0; i<length1; i++)
      path_seq[i] = path1[i];
    for (i=0; i<length2; i++)
      path_seq[length1 + i] = path2[i];
    return path_seq;
  }
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return NULL;
#undef CUR_PROC
}
static plocal_propagate_store_t * pviterbi_propagate_alloc (pmodel *mo, int len_y) {
#define CUR_PROC "pviterbi_propagate_alloc"
  plocal_propagate_store_t* v = NULL;
  int i, j, k;
  ARRAY_CALLOC (v, 1);

  v->mo = mo;
  v->len_y = len_y;
  /* Allocate the log_in_a's -> individal lenghts */
  ARRAY_CALLOC (v->log_in_a, mo->N);
  /* first index of log_in_a: target state */
  for (j = 0; j < mo->N; j++){ 
    /* second index: source state */
    ARRAY_CALLOC (v->log_in_a[j], mo->s[j].in_states);
    for (i=0; i<mo->s[j].in_states; i++) {
      /* third index: transition classes of source state */
      ARRAY_CALLOC (v->log_in_a[j][i], mo->s[mo->s[j].in_id[i]].kclasses);
    }
  }

  ARRAY_CALLOC (v->log_b, mo->N);
  for (j=0; j<mo->N; j++) {
    ARRAY_CALLOC (v->log_b[j], emission_table_size(mo, j) + 1);
  }
  if (!(v->log_b)) {mes_proc(); goto STOP;}
  v->phi = matrix3d_d_alloc(mo->max_offset_x + 1, len_y + mo->max_offset_y + 1, mo->N);
  if (!(v->phi)) {mes_proc(); goto STOP;}
  ARRAY_CALLOC (v->phi_new, mo->N);
  ARRAY_CALLOC (v->end_of_first, mo->max_offset_x + 1);
  for (j=0; j<mo->max_offset_x + 1; j++) {
    ARRAY_CALLOC (v->end_of_first[j], len_y + mo->max_offset_y + 1);
    for (i=0; i<len_y + mo->max_offset_y + 1; i++) {
      ARRAY_CALLOC (v->end_of_first[j][i], mo->N);
      for (k=0; k<mo->N; k++)
	v->end_of_first[j][i][k] = NULL;
	/*ARRAY_CALLOC (v->end_of_first[j][i][k], 1);*/
    }
  }
  v->topo_order_length = 0;
  ARRAY_CALLOC (v->topo_order, mo->N);
  return(v);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  pviterbi_propagate_free(&v, mo->N, mo->max_offset_x, mo->max_offset_y, len_y);
  return(NULL);
#undef CUR_PROC
}
Exemple #23
0
/*============================================================================*/
int ghmm_cmodel_backward (ghmm_cmodel * smo, double *O, int T, double ***b,
                    double **beta, const double *scale)
{
# define CUR_PROC "ghmm_cmodel_backward"
  double *beta_tmp, sum, c_t;
  int i, j, j_id, t, osc;
  int res = -1;
  int pos;

  /* T is length of sequence; divide by dimension to represent the number of time points */
  T /= smo->dim;
  
  ARRAY_CALLOC (beta_tmp, smo->N);

  for (t = 0; t < T; t++) {
    /* try differenent bounds here in case of problems 
       like beta[t] = NaN */
    if (scale[t] < LOWER_SCALE_BOUND) {
      /* printf("backward scale(%d) = %e\n", t , scale[t]); */
      goto STOP;
    }
  }
  /* initialize */
  c_t = 1 / scale[T - 1];
  for (i = 0; i < smo->N; i++) {
    beta[T - 1][i] = 1;
    beta_tmp[i] = c_t;
  }
  /* Backward Step for t = T-2, ..., 0 */
  /* beta_tmp: Vector for storage of scaled beta in one time step */

  if (smo->cos == 1) {
    osc = 0;
  }
  else {
    if (!smo->class_change->get_class) {
      printf ("ERROR: get_class not initialized\n");
      goto STOP;
    }
     osc = smo->class_change->get_class (smo, O, smo->class_change->k, T - 2);
     /* printf("osc(%d) = %d\n",T-2,osc);  */
    if (osc >= smo->cos){
      printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
      goto STOP;
    }     
  }


  for (t = T - 2; t >= 0; t--) {
    pos = t * smo->dim;
    if (b == NULL)
      for (i = 0; i < smo->N; i++) {
        sum = 0.0;
        for (j = 0; j < smo->s[i].out_states; j++) {
          j_id = smo->s[i].out_id[j];
          sum += smo->s[i].out_a[osc][j]
              * ghmm_cmodel_calc_b(smo->s+j_id, O+pos+smo->dim)
              * beta_tmp[j_id];
        }
        beta[t][i] = sum;
      }
    else
      for (i = 0; i < smo->N; i++) {
        sum = 0.0;
        for (j = 0; j < smo->s[i].out_states; j++) {
          j_id = smo->s[i].out_id[j];
          sum +=
            smo->s[i].out_a[osc][j] * b[t + 1][j_id][smo->M] * beta_tmp[j_id];
          
            /*printf("  smo->s[%d].out_a[%d][%d] * b[%d] * beta_tmp[%d] = %f * %f *
            %f\n",i,osc,j,t+1,j_id,smo->s[i].out_a[osc][j], b[t + 1][j_id][smo->M], beta_tmp[j_id]); */
          
        }
        beta[t][i] = sum;
        /* printf(" ->   beta[%d][%d] = %f\n",t,i,beta[t][i]); */
      }
    c_t = 1 / scale[t];
    for (i = 0; i < smo->N; i++)
      beta_tmp[i] = beta[t][i] * c_t;

    if (smo->cos == 1) {
      osc = 0;
    }
    else {
      if (!smo->class_change->get_class) {
        printf ("ERROR: get_class not initialized\n");
        goto STOP;
      }
      /* if t=1 the next iteration will be the last */        
      if (t >= 1){
        osc = smo->class_change->get_class (smo, O, smo->class_change->k, t-1);
        /* printf("osc(%d) = %d\n",t-1,osc);  */
        if (osc >= smo->cos){
          printf("ERROR: get_class returned index %d but model has only %d classes !\n",osc,smo->cos);
          goto STOP;
        }	
      }
    }
  }
  res = 0;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  m_free (beta_tmp);
  return (res);
# undef CUR_PROC
}                               /* ghmm_cmodel_backward */
Exemple #24
0
/*============================================================================*/
static int ighmm_hlist_prop_forward (ghmm_dmodel * mo, hypoList * h, hypoList ** hplus,
				     int labels, int *nr_s, int *max_out) {
#define CUR_PROC "ighmm_hlist_prop_forward"
  int i, j, c, k;
  int i_id, j_id, g_nr;
  int no_oldHyps = 0, newHyps = 0;
  hypoList *hP = h;
  hypoList **created;

  ARRAY_MALLOC (created, labels);

  /* extend the all hypotheses with the labels of out_states
     of all states in the hypotesis */
  while (hP != NULL) {

    /* lookup table for labels, created[i]!=0 iff the current hypotheses
       was propagated forward with label i */
    for (c = 0; c < labels; c++)
      created[c] = NULL;

    /* extend the current hypothesis and add all states which may have
       probability greater null */
    for (i = 0; i < hP->gamma_states; i++) {
      /* skip impossible states */
      if (hP->gamma_a[i] == 1.0)
        continue;
      i_id = hP->gamma_id[i];
      for (j = 0; j < mo->s[i_id].out_states; j++) {
        j_id = mo->s[i_id].out_id[j];
        c = mo->label[j_id];

        /* create a new hypothesis with label c */
        if (!created[c]) {
          ighmm_hlist_insert (hplus, c, hP);
          created[c] = *hplus;
          /* initiallize gamma-array with safe size (number of states */
          ARRAY_MALLOC ((*hplus)->gamma_id, m_min (nr_s[c], hP->gamma_states * max_out[hP->hyp_c]));
          (*hplus)->gamma_id[0] = j_id;
          (*hplus)->gamma_states = 1;
          newHyps++;
        }
        /* add a new gamma state to the existing hypothesis with c */
        else {
          g_nr = created[c]->gamma_states;
          /* search for state j_id in the gamma list */
          for (k = 0; k < g_nr; k++)
            if (j_id == created[c]->gamma_id[k])
              break;
          /* add the state to the gamma list */
          if (k == g_nr) {
            created[c]->gamma_id[g_nr] = j_id;
            created[c]->gamma_states = g_nr + 1;
          }
        }
      }
    }
    /* reallocating gamma-array to the correct size */
    for (c = 0; c < labels; c++) {
      if (created[c]) {
        ARRAY_CALLOC (created[c]->gamma_a, created[c]->gamma_states);
        ARRAY_REALLOC (created[c]->gamma_id, created[c]->gamma_states);
        created[c] = NULL;
      }
    }
    hP = hP->next;
    no_oldHyps++;
  }

  /* printf("Created %d new Hypotheses.\n", newHyps); */
  free (created);
  return (no_oldHyps);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_hlist_prop_forward failed\n");
  exit (1);
#undef CUR_PROC
}
Exemple #25
0
ghmm_cseq *ghmm_sgenerate_extensions (ghmm_cmodel * smo, ghmm_cseq * sqd_short,
                                    int seed, int global_len,
                                    sgeneration_mode_t mode)
{
#define CUR_PROC "ghmm_sgenerate_extensions"
  ghmm_cseq *sq = NULL;
  int i, j, t, n, m, len = global_len, short_len, max_short_len = 0, up = 0;
#ifdef bausparkasse
  int tilgphase = 0;
#endif
  /* int *v_path = NULL; */
  double log_p, *initial_distribution, **alpha, *scale, p, sum;
  /* aicj */
  int class = -1;
  int pos;

  /* TEMP */
  if (mode == all_viterbi || mode == viterbi_viterbi || mode == viterbi_all) {
    GHMM_LOG(LCONVERTED, "Error: mode not implemented yet\n");
    goto STOP;
  }

  if (len <= 0)
    /* no global length; model should have a final state */
    len = (int) GHMM_MAX_SEQ_LEN;
  max_short_len = ghmm_cseq_max_len (sqd_short);

  /*---------------alloc-------------------------------------------------*/
  sq = ghmm_cseq_calloc (sqd_short->seq_number);
  if (!sq) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (initial_distribution, smo->N);
  /* is needed in cfoba_forward() */
  alpha = ighmm_cmatrix_alloc (max_short_len, smo->N);
  if (!alpha) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (scale, max_short_len);
  ghmm_rng_init ();
  GHMM_RNG_SET (RNG, seed);

  /*---------------main loop over all seqs-------------------------------*/
  for (n = 0; n < sqd_short->seq_number; n++) {
    ARRAY_CALLOC (sq->seq[n], len*(smo->dim));
    short_len = sqd_short->seq_len[n];
    if (len < short_len) {
      GHMM_LOG(LCONVERTED, "Error: given sequence is too long\n");
      goto STOP;
    }
    ghmm_cseq_copy (sq->seq[n], sqd_short->seq[n], short_len);
#ifdef GHMM_OBSOLETE
    sq->seq_label[n] = sqd_short->seq_label[n];
#endif /* GHMM_OBSOLETE */

    /* Initial distribution */
    /* 1. Viterbi-state */
#if 0
    /* wieder aktivieren, wenn ghmm_cmodel_viterbi realisiert */
    if (mode == viterbi_all || mode == viterbi_viterbi) {
      v_path = cviterbi (smo, sqd_short->seq[n], short_len, &log_p);
      if (v_path[short_len - 1] < 0 || v_path[short_len - 1] >= smo->N) {
        GHMM_LOG(LCONVERTED, "Warning:Error: from viterbi()\n");
        sq->seq_len[n] = short_len;
        m_realloc (sq->seq[n], short_len);
        continue;
      }
      m_memset (initial_distribution, 0, smo->N);
      initial_distribution[v_path[short_len - 1]] = 1.0;        /* all other 0 */
      m_free (v_path);
    }
#endif

    /* 2. Initial Distribution ???
       Pi(i) = alpha_t(i)/P(O|lambda) */
    if (mode == all_all || mode == all_viterbi) {
      if (short_len > 0) {
        if (ghmm_cmodel_forward (smo, sqd_short->seq[n], short_len, NULL /* ?? */ ,
                           alpha, scale, &log_p)) {
          GHMM_LOG_QUEUED(LCONVERTED);
          goto STOP;
        }
        sum = 0.0;
        for (i = 0; i < smo->N; i++) {
          /* alpha ist skaliert! */
          initial_distribution[i] = alpha[short_len - 1][i];
          sum += initial_distribution[i];
        }
        /* nicht ok.? auf eins skalieren? */
        for (i = 0; i < smo->N; i++)
          initial_distribution[i] /= sum;
      }
      else {
        for (i = 0; i < smo->N; i++)
          initial_distribution[i] = smo->s[i].pi;
      }
    }
    /* if short_len > 0:
       Initial state == final state from sqd_short; no output here
       else
       choose inittial state according to pi and do output
     */
    p = GHMM_RNG_UNIFORM (RNG);
    sum = 0.0;
    for (i = 0; i < smo->N; i++) {
      sum += initial_distribution[i];
      if (sum >= p)
        break;
    }
    /* error due to incorrect normalization ?? */
    if (i == smo->N) {
      i--;
      while (i > 0 && initial_distribution[i] == 0.0)
        i--;
    }
    t = 0;
    pos = t * smo->dim;
    if (short_len == 0) {
      /* Output in state i */
      p = GHMM_RNG_UNIFORM (RNG);
      sum = 0.0;
      for (m = 0; m < smo->M; m++) {
        sum += smo->s[i].c[m];
        if (sum >= p)
          break;
      }
      /* error due to incorrect normalization ?? */
      if (m == smo->M) {
        m--;
        while (m > 0 && smo->s[i].c[m] == 0.0)
          m--;
      }
      ghmm_cmodel_get_random_var(smo, i, m, sq->seq[n]+pos);

      if (smo->cos == 1) {
        class = 0;
      }
      else {
        if (!smo->class_change->get_class) {
          printf ("ERROR: get_class not initialized\n");
          goto STOP;
        }
        /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/
        class = smo->class_change->get_class (smo, sq->seq[n], n, t);
      }


      t++;
      pos += smo->dim;
    }
Exemple #26
0
/*----------------------------------------------------------------------------*/
double ghmm_dmodel_label_discrim_perf(ghmm_dmodel** mo, ghmm_dseq** sqs, int noC)
{
#define CUR_PROC "ghmm_dmodel_label_discrim_perf"

  int k, l, m, temp;
  int argmax = 0;
  double sum, max;
  double *logp;
  double exponent;
  long double sigmoid;
  double performance = 0.0;

  ghmm_dseq *sq;

  ARRAY_CALLOC (logp, noC);

  /* iterate over all classes */
  for (k = 0; k < noC; k++) {
    sq = sqs[k];
    /*iterate over all training sequences */
    for (l = 0; l < sq->seq_number; l++) {

      /* iterate over all classes */
      for (m = 0; m < noC; m++) {
        temp = ghmm_dmodel_logp (mo[m], sq->seq[l], sq->seq_len[l], &(logp[m]));
        if (0 != temp)
          printf ("ghmm_dmodel_logp error in sequence[%d][%d] under model %d (%g)\n",
                  k, l, m, logp[m]);
        /*printf("ghmm_dmodel_logp sequence[%d][%d] under model %d (%g)\n", k, l, m, logp[m]);*/
      }

      max = 1.0;
      for (m = 0; m < noC; m++) {
        if (m != k && (1.0 == max || max < (logp[m] + log (mo[m]->prior)))) {
          max = logp[m] + log (mo[m]->prior);
          argmax = m;
        }
      }

      /* sum */
      sum = 1.0;
      for (m = 0; m < noC; m++) {
        if (m != k && m != argmax)
          sum += exp (logp[m] + log (mo[m]->prior) - max);
      }
      sum = log (sum) + max;

      exponent = logp[k] + log (mo[k]->prior) - sum;

      if (exponent < logl (LDBL_MIN)) {
        printf ("exponent was too large (%g) cut it down!\n", exponent);
        exponent = (double) logl (LDBL_MIN);
      }

      sigmoid = 1.0 / (1.0 + expl ((-discrime_alpha) * exponent));

      performance += (double) sigmoid;
    }
  }
  m_free (logp);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  return performance;
#undef CUR_PROC
}