Ejemplo n.º 1
0
/*===========================================================================*/
static int parseModelType(const char * data, unsigned int size) {
#define CUR_PROC "parseModelType"

  int i, noValidMo, modelType=0;
  const char * end = data;
  char * str;

  while ((end = strchr(data, ' '))) {
    modelType += matchModelType(data, end-data);
    size -= (end-data)+1;
    data = end+1;
  }
  modelType += matchModelType(data, size);

  noValidMo = sizeof(validModelTypes)/sizeof(validModelTypes[0]);
  for (i=0; i<noValidMo; i++) {
    if (modelType == validModelTypes[i])
      break;
  }
  if (i == noValidMo) {
    str = ighmm_mprintf(NULL, 0, "%d is no known valid model type", modelType);
    GHMM_LOG(LERROR, str);
    m_free(str);
    return -1;
  }

  return modelType;
#undef CUR_PROC
}
Ejemplo n.º 2
0
/*===========================================================================*/
static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) {
#define CUR_PROC "parseCSVList"

  int retval=0;
  int i;
  char * * next, * estr;
  double tmp;

  ARRAY_CALLOC(next, 1);

  for (i=0; i<size; i++) {
    array[i] = strtod(data, next);
    if (data == *next) {
      estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next);
      GHMM_LOG(LERROR, estr);
      m_free(estr);
      retval=-1;
      break;
    }
    if (next)
      data = *next+1;
    else
      break;
  }

  if (i != size) {
    retval=-1;
    estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size);
    GHMM_LOG(LERROR, estr);
    m_free(estr);
  }

  if (reverse) {
    for (i=0; i<size/2; i++) {
      tmp = array[i];
      array[i] = array[size-i-1];
      array[size-i-1] = tmp;
    }
  }

STOP:
  m_free(next);
  return retval;
#undef CUR_PROC
}
Ejemplo n.º 3
0
/*===========================================================================*/
static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {
#define CUR_PROC "parseAlphabet"

  char * str;
  int M, code, error;

  xmlNodePtr symbol;
  ghmm_alphabet * alfa;

  ARRAY_CALLOC(alfa, 1);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      code = getIntAttribute(symbol, "code", &error);
      if (error || code!=M) {
        str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M);
        GHMM_LOG(LERROR, str);
        m_free(str);
        goto STOP;
      } else
        M++;
    }
    symbol=symbol->next;
  }

  alfa->size = M;
  /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/
  ARRAY_MALLOC(alfa->symbols, M);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol);
      /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/
    }
    symbol=symbol->next;
  }

  return alfa;
STOP:
  m_free(alfa->symbols);
  m_free(alfa)
  return NULL;
#undef CUR_PROC
}
Ejemplo n.º 4
0
/* PROBLEM: Entries can get very small and be rounded to 0 */
int ighmm_cvector_normalize (double *v, int len)
{
#define CUR_PROC "ighmm_cvector_normalize"
  int i;
  double sum = 0.0;
  char * estr;

  for (i = 0; i < len; i++)
    sum += v[i];
  if (i>0 && sum<GHMM_EPS_PREC) {
    estr = ighmm_mprintf(NULL, 0, "Can't normalize vector. Sum smaller than %g\n"
			, GHMM_EPS_PREC);
    GHMM_LOG(LWARN, estr);
    m_free(estr);
    return (-1);
  }
  for (i = 0; i < len; i++)
    v[i] /= sum;
  return 0;
#undef CUR_PROC
}                               /* ighmm_cvector_normalize */
Ejemplo n.º 5
0
/*===========================================================================*/
static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) {
#define CUR_PROC "parseState"

  int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label;
  int curX=0, curY=0;
  double pi, prior;
  double *emissions = NULL;
  char *desc = NULL;
  char *s = NULL, *estr;
  int rev, stateFixed=1;
  ghmm_cstate *newcstate;
  ghmm_c_emission *emission;

  xmlNodePtr elem, child, multichild;

  state = getIntAttribute(cur, "id", &error);
  pi = getDoubleAttribute(cur, "initial", &error);
  if (error) {
    estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for"
                         "state %d", state);
    GHMM_LOG(LERROR, estr);
    goto STOP;
  } else

  desc = xmlGetProp(cur, BAD_CAST "desc");

  elem = cur->children;
  while (elem!=NULL) {
    /* ======== silent state ============================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) {
      switch (f->modelType & PTR_TYPE_MASK) {
      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
        f->model.dp[modelNo]->silent[state] = 1;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    /* ======== discrete state (possible higher order) ==================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) {
      assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0));

      /* fixed is a propety of the distribution and optional */
      fixed = getIntAttribute(elem, "fixed", &error);
      if (error)
        fixed = 0;

      /* order is optional for discrete */
      if (f->modelType & GHMM_kHigherOrderEmissions) {
        order = getIntAttribute(elem, "order", &error);
        if (error)
          order = 0;
      }

      rev = getIntAttribute(cur, "rev", &error);
      if (error)
        rev = 0;

      /* parsing emission probabilities */
      s = (char *)xmlNodeGetContent(elem);

      switch (f->modelType & PTR_TYPE_MASK) {

      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->s[state].desc = desc;
        f->model.d[modelNo]->s[state].pi = pi;
        f->model.d[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions) {
          f->model.d[modelNo]->order[state] = order;
          if (f->model.d[modelNo]->maxorder < order) {
            f->model.d[modelNo]->maxorder = order;
            estr = ighmm_mprintf(NULL, 0, "Updated maxorder to %d\n",
                                 f->model.d[modelNo]->maxorder);
            GHMM_LOG(LDEBUG, estr);
            m_free(estr);
          }
        }
        ARRAY_MALLOC(emissions, pow(f->model.d[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.d[modelNo]->M, order+1), emissions, rev);
        free(f->model.d[modelNo]->s[state].b);
        f->model.d[modelNo]->s[state].b = emissions;
        break;

      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->s[state].desc = desc;
        f->model.ds[modelNo]->s[state].pi = pi;
        f->model.ds[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions)
          f->model.ds[modelNo]->order[state] = order;
        ARRAY_MALLOC(emissions, pow(f->model.ds[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.ds[modelNo]->M, order+1), emissions, rev);
        f->model.ds[modelNo]->s[state].b = emissions;
        break;

      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
      m_free(s);
    }

    /* ======== continuous state ========================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "mixture"))) {
      assert(f->modelType & GHMM_kContinuousHMM);
      M = 0;
      child = elem->children;
      while (child != NULL) {
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          M ++;

        }
        child = child->next;
      }
      ghmm_cstate_alloc(f->model.c[modelNo]->s + state, M, inDegree[state], outDegree[state], f->model.c[modelNo]->cos);
      newcstate = f->model.c[modelNo]->s + state;

      newcstate->desc = desc;
      newcstate->M = M;
      newcstate->pi = pi;

      if( f->model.c[modelNo]->M < M)
        f->model.c[modelNo]->M = M;

      child = elem->children;

      i = 0;
      while (child != NULL) {

        emission = newcstate->e+i;

        /* common attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          fixed = getIntAttribute(child, "fixed", &error);
          if (error)
            fixed = 0;
          stateFixed = fixed && stateFixed;
          /* allocate emission */
          emission->fixed = fixed;

          prior = getDoubleAttribute(child, "prior", &error);
          if (error)
            prior = 1.0;
          newcstate->c[i] = prior;
        }
        /* child is not a density, continue with the next child */
        else {
          child = child->next;
          continue;
        }

        /* density type dependent attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          /* should the normal distribution be approximated? */
          aprox = getIntAttribute(child, "approx", &error);
          if (error)
            aprox = 0;
          emission->type      = aprox ? normal_approx : normal;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalLeftTail"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->min       = getDoubleAttribute(child, "max", &error);
          emission->type      = normal_left;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalRightTail"))) {

          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->max       = getDoubleAttribute(child, "min", &error);
          emission->type      = normal_right;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "uniform"))) {
          emission->max  = getDoubleAttribute(child, "max", &error);
          emission->min  = getDoubleAttribute(child, "min", &error);
          emission->type = uniform;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "multinormal"))) {
          emission->type = multinormal;
          emission->dimension = getIntAttribute(child, "dimension", &error);

          /* check that all emissions in all states have same dimension or
             set when first emission is read*/
          if (f->model.c[modelNo]->dim <= 1)
            f->model.c[modelNo]->dim = emission->dimension;
          else if (f->model.c[modelNo]->dim != emission->dimension) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }

          if (0 != ghmm_c_emission_alloc(emission, emission->dimension)) {
            GHMM_LOG(LERROR, "Can not allocate multinormal emission.");
            goto STOP;
          }
          multichild = child->children;
          while (multichild != NULL) {
            if ((!xmlStrcmp(multichild->name, BAD_CAST "mean"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension, emission->mean.vec, 0)) {
                GHMM_LOG(LERROR, "Can not parse mean CSV list.");
                goto STOP;
              }
            }
            if ((!xmlStrcmp(multichild->name, BAD_CAST "variance"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension * emission->dimension,
                                     emission->variance.mat, 0)) {
                GHMM_LOG(LERROR, "Can not parse variance CSV list.");
                goto STOP;
              }
              if (0 != ighmm_invert_det(emission->sigmainv, &emission->det,
                                        emission->dimension, emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate inverse of covariance matrix.");
                goto STOP;
              }
              if (0 != ighmm_cholesky_decomposition(emission->sigmacd,
                                                    emission->dimension,
                                                    emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate cholesky decomposition of covariance matrix.");
                goto STOP;
              }
            }
            multichild = multichild->next;
          }
        }
        i++;
        child = child->next;
      }
      newcstate->fix = stateFixed;
    }

    /* ======== pair hmm state ============================================ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "pair"))) {
    }

    /* -------- background name  ------------------------------------------ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "backgroundKey"))) {

      assert(f->modelType & GHMM_kBackgroundDistributions);

      s = (char *)xmlNodeGetContent(elem);

      for (i=0; i<f->model.d[modelNo]->bp->n; i++) {
        if (0 == strcmp(s, f->model.d[modelNo]->bp->name[i])) {
          if (order != f->model.d[modelNo]->bp->order[i]) {
            estr = ighmm_mprintf(NULL, 0, "order of background %s and state %d"
                                 " does not match",
                                 f->model.d[modelNo]->bp->name[i], state);
            GHMM_LOG(LERROR, estr);
            m_free(estr);
            goto STOP;
          } else {
            f->model.d[modelNo]->background_id[state] = i;
            break;
          }
        }
      }
      if (i == f->model.d[modelNo]->bp->n) {
        estr = ighmm_mprintf(NULL, 0, "can't find background with name %s in"
                             " state %d", s, state);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "class"))) {

      assert(f->modelType & GHMM_kLabeledStates);

      s = (char *)xmlNodeGetContent(elem);
      label = atoi(s);
      m_free(s);
      if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM) {
        if (f->model.d[modelNo]->label_alphabet->size > label)
          f->model.d[modelNo]->label[state] = label;
        else
          GHMM_LOG(LWARN, "Invalid label");
      }
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "tiedTo"))) {

      assert(f->modelType & GHMM_kTiedEmissions);

      s = (char *)xmlNodeGetContent(elem);
      tied = atoi(s);
      if (state>=tied) {
        f->model.d[modelNo]->tied_to[state] = tied;
        if (f->model.d[modelNo]->tied_to[tied] != tied) {
          estr = ighmm_mprintf(NULL, 0, "state %d not tied to tie group leader", state);
          GHMM_LOG(LERROR, estr);
          m_free(estr);
          goto STOP;
        }
      } else {
        estr = ighmm_mprintf(NULL, 0, "state %d tiedTo (%d) is invalid", state, tied);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- position for graphical editing ---------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "position"))) {
      curX = getIntAttribute(elem, "x", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read x position");
      curY = getIntAttribute(elem, "y", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read y position");

      switch (f->modelType & PTR_TYPE_MASK) {
      case GHMM_kDiscreteHMM:
        f->model.d[modelNo]->s[state].xPosition = curX;
        f->model.d[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kTransitionClasses:
        f->model.ds[modelNo]->s[state].xPosition = curX;
        f->model.ds[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kPairHMM:
      case GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses:
        f->model.dp[modelNo]->s[state].xPosition = curX;
        f->model.dp[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kContinuousHMM:
      case GHMM_kContinuousHMM+GHMM_kTransitionClasses:
      case (GHMM_kContinuousHMM+GHMM_kMultivariate):
      case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
        f->model.c[modelNo]->s[state].xPosition = curX;
        f->model.c[modelNo]->s[state].yPosition = curY;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    elem = elem->next;
  }

  return 0;
STOP:
  m_free(s);
  m_free(desc);
  m_free(emissions)
  return -1;
#undef CUR_PROC
}
Ejemplo n.º 6
0
/**
   Trains the ghmm_dmodel with a set of annotated sequences till convergence using
   gradient descent.
   Model must not have silent states. (checked in Python wrapper)
   @return            trained model/NULL pointer success/error
   @param mo:         pointer to a ghmm_dmodel
   @param sq:         struct of annotated sequences
   @param eta:        intial parameter eta (learning rate)
   @param no_steps    number of training steps
 */
ghmm_dmodel* ghmm_dmodel_label_gradient_descent (ghmm_dmodel* mo, ghmm_dseq * sq, double eta, int no_steps)
{
#define CUR_PROC "ghmm_dmodel_label_gradient_descent"

  char * str;
  int runs = 0;
  double cur_perf, last_perf;
  ghmm_dmodel *last;

  last = ghmm_dmodel_copy(mo);
  last_perf = compute_performance (last, sq);

  while (eta > GHMM_EPS_PREC && runs < no_steps) {
    runs++;
    if (-1 == gradient_descent_onestep(mo, sq, eta)) {
      ghmm_dmodel_free(&last);
      return NULL;
    }
    cur_perf = compute_performance(mo, sq);

    if (last_perf < cur_perf) {
      /* if model is degenerated, lower eta and try again */
      if (cur_perf > 0.0) {
        str = ighmm_mprintf(NULL, 0, "current performance = %g", cur_perf);
	GHMM_LOG(LINFO, str);
	m_free(str);
        ghmm_dmodel_free(&mo);
        mo = ghmm_dmodel_copy(last);
        eta *= .5;
      }
      else {
        /* Improvement insignificant, assume convergence */
        if (fabs (last_perf - cur_perf) < cur_perf * (-1e-8)) {
          ghmm_dmodel_free(&last);
          str = ighmm_mprintf(NULL, 0, "convergence after %d steps.", runs);
	  GHMM_LOG(LINFO, str);
	  m_free(str);
          return 0;
        }

        if (runs < 175 || 0 == runs % 50) {
          str = ighmm_mprintf(NULL, 0, "Performance: %g\t improvement: %g\t step %d", cur_perf,
			       cur_perf - last_perf, runs);
	  GHMM_LOG(LINFO, str);
	  m_free(str);
	}

        /* significant improvement, next iteration */
        ghmm_dmodel_free(&last);
        last = ghmm_dmodel_copy(mo);
        last_perf = cur_perf;
        eta *= 1.07;
      }
    }
    /* no improvement */
    else {

      if (runs < 175 || 0 == runs % 50) {
        str = ighmm_mprintf(NULL, 0, "Performance: %g\t !IMPROVEMENT: %g\t step %d", cur_perf,
                cur_perf - last_perf, runs);
	GHMM_LOG(LINFO, str);
	m_free(str);
      }

      /* try another training step */
      runs++;
      eta *= .85;
      if (-1 == gradient_descent_onestep(mo, sq, eta)) {
        ghmm_dmodel_free(&last);
        return NULL;
      }
      cur_perf = compute_performance (mo, sq);
      str = ighmm_mprintf(NULL, 0, "Performance: %g\t ?Improvement: %g\t step %d", cur_perf,
              cur_perf - last_perf, runs);
      GHMM_LOG(LINFO, str);
      m_free(str);

      /* improvement, save and proceed with next iteration */
      if (last_perf < cur_perf && cur_perf < 0.0) {
        ghmm_dmodel_free (&last);
        last = ghmm_dmodel_copy(mo);
        last_perf = cur_perf;
      }
      /* still no improvement, revert to saved model */
      else {
        runs--;
        ghmm_dmodel_free(&mo);
        mo = ghmm_dmodel_copy(last);
        eta *= .9;
      }
    }
  }

  ghmm_dmodel_free(&last);
  return mo;

#undef CUR_PROC
}
Ejemplo n.º 7
0
/*============================================================================*/
int *ghmm_dmodel_label_kbest (ghmm_dmodel * mo, int *o_seq, int seq_len, int k, double *log_p)
{
#define CUR_PROC "ghmm_dl_kbest"
  int i, t, c, l, m;            /* counters */
  int no_oldHyps;               /* number of hypotheses until position t-1 */
  int b_index, i_id;            /* index for addressing states' b arrays */
  int no_labels = 0;
  int exists, g_nr;
  int *states_wlabel;
  int *label_max_out;
  char *str;

  /* logarithmized transition matrix A, log(a(i,j)) => log_a[i*N+j],
     1.0 for zero probability */
  double **log_a;

  /* matrix of hypotheses, holds for every position in the sequence a list
     of hypotheses */
  hypoList **h;
  hypoList *hP;

  /* vectors for rows in the matrices */
  int *hypothesis;

  /* pointer & prob. of the k most probable hypotheses for each state
     - matrices of dimensions #states x k:  argm(i,l) => argmaxs[i*k+l] */
  double *maxima;
  hypoList **argmaxs;

  /* pointer to & probability of most probable hypothesis in a certain state */
  hypoList *argmax;
  double sum;

  /* break if sequence empty or k<1: */
  if (seq_len <= 0 || k <= 0)
    return NULL;

  ARRAY_CALLOC (h, seq_len);

  /* 1. Initialization (extend empty hypothesis to #labels hypotheses of
         length 1): */

  /* get number of labels (= maximum label + 1)
     and number of states with those labels */
  ARRAY_CALLOC (states_wlabel, mo->N);
  ARRAY_CALLOC (label_max_out, mo->N);
  for (i = 0; i < mo->N; i++) {
    c = mo->label[i];
    states_wlabel[c]++;
    if (c > no_labels)
      no_labels = c;
    if (mo->s[i].out_states > label_max_out[c])
      label_max_out[c] = mo->s[i].out_states;
  }
  /* add one to the maximum label to get the number of labels */
  no_labels++;
  ARRAY_REALLOC (states_wlabel, no_labels);
  ARRAY_REALLOC (label_max_out, no_labels);

  /* initialize h: */
  hP = h[0];
  for (i = 0; i < mo->N; i++) {
    if (mo->s[i].pi > KBEST_EPS) {
      /* printf("Found State %d with initial probability %f\n", i, mo->s[i].pi); */
      exists = 0;
      while (hP != NULL) {
        if (hP->hyp_c == mo->label[i]) {
          /* add entry to the gamma list */
          g_nr = hP->gamma_states;
          hP->gamma_id[g_nr] = i;
          hP->gamma_a[g_nr] =
            log (mo->s[i].pi) +
            log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
          hP->gamma_states = g_nr + 1;
          exists = 1;
          break;
        }
        else
          hP = hP->next;
      }
      if (!exists) {
        ighmm_hlist_insert (&(h[0]), mo->label[i], NULL);
        /* initiallize gamma-array with safe size (number of states) and add the first entry */
        ARRAY_MALLOC (h[0]->gamma_a, states_wlabel[mo->label[i]]);
        ARRAY_MALLOC (h[0]->gamma_id, states_wlabel[mo->label[i]]);
        h[0]->gamma_id[0] = i;
        h[0]->gamma_a[0] =
          log (mo->s[i].pi) +
          log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
        h[0]->gamma_states = 1;
        h[0]->chosen = 1;
      }
      hP = h[0];
    }
  }
  /* reallocating the gamma list to the real size */
  hP = h[0];
  while (hP != NULL) {
    ARRAY_REALLOC (hP->gamma_a, hP->gamma_states);
    ARRAY_REALLOC (hP->gamma_id, hP->gamma_states);
    hP = hP->next;
  }

  /* calculate transition matrix with logarithmic values: */
  log_a = kbest_buildLogMatrix (mo->s, mo->N);

  /* initialize temporary arrays: */
  ARRAY_MALLOC (maxima, mo->N * k);                             /* for each state save k */
  ARRAY_MALLOC (argmaxs, mo->N * k);


  /*------ Main loop: Cycle through the sequence: ------*/
  for (t = 1; t < seq_len; t++) {

    /* put o_seq[t-1] in emission history: */
    update_emission_history (mo, o_seq[t - 1]);

    /* 2. Propagate hypotheses forward and update gamma: */
    no_oldHyps =
      ighmm_hlist_prop_forward (mo, h[t - 1], &(h[t]), no_labels, states_wlabel,
                     label_max_out);
    /* printf("t = %d (%d), no of old hypotheses = %d\n", t, seq_len, no_oldHyps); */

    /*-- calculate new gamma: --*/
    hP = h[t];
    /* cycle through list of hypotheses */
    while (hP != NULL) {

      for (i = 0; i < hP->gamma_states; i++) {
        /* if hypothesis hP ends with label of state i:
           gamma(i,c):= log(sum(exp(a(j,i)*exp(oldgamma(j,old_c)))))
           + log(b[i](o_seq[t]))
           else: gamma(i,c):= -INF (represented by 1.0) */
        i_id = hP->gamma_id[i];
        hP->gamma_a[i] = ighmm_log_gamma_sum (log_a[i_id], &mo->s[i_id], hP->parent);
        b_index = get_emission_index (mo, i_id, o_seq[t], t);
        if (b_index < 0) {
          hP->gamma_a[i] = 1.0;
          if (mo->order[i_id] > t)
            continue;
          else {
            str = ighmm_mprintf (NULL, 0,
                           "i_id: %d, o_seq[%d]=%d\ninvalid emission index!\n",
                           i_id, t, o_seq[t]);
            GHMM_LOG(LCONVERTED, str);
            m_free (str);
          }
        }
        else
          hP->gamma_a[i] += log (mo->s[i_id].b[b_index]);
        /*printf("%g = %g\n", log(mo->s[i_id].b[b_index]), hP->gamma_a[i]); */
        if (hP->gamma_a[i] > 0.0) {
          GHMM_LOG(LCONVERTED, "gamma to large. ghmm_dl_kbest failed\n");
          exit (1);
        }
      }
      hP = hP->next;
    }

    /* 3. Choose the k most probable hypotheses for each state and discard all
	   hypotheses that were not chosen: */

    /* initialize temporary arrays: */
    for (i = 0; i < mo->N * k; i++) {
      maxima[i] = 1.0;
      argmaxs[i] = NULL;
    }

    /* cycle through hypotheses & calculate the k most probable hypotheses for
       each state: */
    hP = h[t];
    while (hP != NULL) {
      for (i = 0; i < hP->gamma_states; i++) {
        i_id = hP->gamma_id[i];
        if (hP->gamma_a[i] > KBEST_EPS)
          continue;
        /* find first best hypothesis that is worse than current hypothesis: */
        for (l = 0;
             l < k && maxima[i_id * k + l] < KBEST_EPS
             && maxima[i_id * k + l] > hP->gamma_a[i]; l++);
        if (l < k) {
          /* for each m>l: m'th best hypothesis becomes (m+1)'th best */
          for (m = k - 1; m > l; m--) {
            argmaxs[i_id * k + m] = argmaxs[i_id * k + m - 1];
            maxima[i_id * k + m] = maxima[i_id * k + m - 1];
          }
          /* save new l'th best hypothesis: */
          maxima[i_id * k + l] = hP->gamma_a[i];
          argmaxs[i_id * k + l] = hP;
        }
      }
      hP = hP->next;
    }

    /* set 'chosen' for all hypotheses from argmaxs array: */
    for (i = 0; i < mo->N * k; i++)
      /* only choose hypotheses whose prob. is at least threshold*max_prob */
      if (maxima[i] != 1.0
          && maxima[i] >= KBEST_THRESHOLD + maxima[(i % mo->N) * k])
        argmaxs[i]->chosen = 1;

    /* remove hypotheses that were not chosen from the lists: */
    /* remove all hypotheses till the first chosen one */
    while (h[t] != NULL && 0 == h[t]->chosen)
      ighmm_hlist_remove (&(h[t]));
    /* remove all other not chosen hypotheses */
    if (!h[t]) {
      GHMM_LOG(LCONVERTED, "No chosen hypothesis. ghmm_dl_kbest failed\n");
      exit (1);
    }
    hP = h[t];
    while (hP->next != NULL) {
      if (1 == hP->next->chosen)
        hP = hP->next;
      else
        ighmm_hlist_remove (&(hP->next));
    }
  }
  /* dispose of temporary arrays: */
  m_free(states_wlabel);
  m_free(label_max_out);
  m_free(argmaxs);
  m_free(maxima);
  /* transition matrix is no longer needed from here on */
  for (i=0; i<mo->N; i++)
    m_free(log_a[i]);
  m_free(log_a);

  /* 4. Save the hypothesis with the highest probability over all states: */
  hP = h[seq_len - 1];
  argmax = NULL;
  *log_p = 1.0;                 /* log_p will store log of maximum summed probability */
  while (hP != NULL) {
    /* sum probabilities for each hypothesis over all states: */
    sum = ighmm_cvector_log_sum (hP->gamma_a, hP->gamma_states);
    /* and select maximum sum */
    if (sum < KBEST_EPS && (*log_p == 1.0 || sum > *log_p)) {
      *log_p = sum;
      argmax = hP;
    }
    hP = hP->next;
  }

  /* found a valid path? */
  if (*log_p < KBEST_EPS) {
    /* yes: extract chosen hypothesis: */
    ARRAY_MALLOC (hypothesis, seq_len);
    for (i = seq_len - 1; i >= 0; i--) {
      hypothesis[i] = argmax->hyp_c;
      argmax = argmax->parent;
    }
  }
  else
    /* no: return 1.0 representing -INF and an empty hypothesis */
    hypothesis = NULL;

  /* dispose of calculation matrices: */
  hP = h[seq_len - 1];
  while (hP != NULL)
    ighmm_hlist_remove (&hP);
  free (h);
  return hypothesis;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ghmm_dl_kbest failed\n");
  exit (1);
#undef CUR_PROC
}