Beispiel #1
0
/* ========================================================================= */
static char * doubleArrayToCSV(double * array, int size) {
#define CUR_PROC "doubleArrayToCSV"

  int i, pos=0;
  char *csv=NULL;
  int singlelength = (2 + /* comma and space */
                      8 + /* 8 signifcant digits */
                      1 + /* sign */
                      5 + /* 'E' and signed mantissa */
                      3);   /* safety */
  int maxlength = size * singlelength;

  ARRAY_MALLOC(csv, maxlength);

  for (i=0; i < size-1 && pos + singlelength < maxlength; i++) {
    pos += sprintf(csv+pos, "%.8g, ", array[i]);
  }
  if (i < size-1 || pos + singlelength > maxlength) {
    GHMM_LOG(LERROR, "writing CSV failed");
    goto STOP;
  } else {
    pos += sprintf(csv+pos, "%.8g", array[i]);
  }
  /*printf("%d bytes of %d written\n", pos, maxlength);*/
  return csv;
STOP:
  free(csv);
  return NULL;
#undef  CUR_PROC
}
Beispiel #2
0
/** allocates memory for m and n matrices: */
static int gradient_descent_galloc (double ***matrix_b, double **matrix_a,
                             double **matrix_pi, ghmm_dmodel * mo)
{
#define CUR_PROC "gradient_descent_galloc"

  int i;

  /* first allocate memory for matrix_b */
  ARRAY_MALLOC (*matrix_b, mo->N);
  for (i = 0; i < mo->N; i++)
    ARRAY_CALLOC ((*matrix_b)[i], ghmm_ipow (mo, mo->M, mo->order[i] + 1));

  /* matrix_a(i,j) = matrix_a[i*mo->N+j] */
  ARRAY_CALLOC (*matrix_a, mo->N * mo->N);

  /* allocate memory for matrix_pi */
  ARRAY_CALLOC (*matrix_pi, mo->N);

  return 0;

STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  gradient_descent_gfree (*matrix_b, *matrix_a, *matrix_pi, mo->N);
  return -1;

#undef CUR_PROC
}
Beispiel #3
0
ghmm_dpseq * ghmm_dpseq_init(int length, int number_of_alphabets, int number_of_d_seqs) {
#define CUR_PROC "ghmm_dpseq_init"
  ghmm_dpseq * seq;

  ARRAY_MALLOC (seq, 1);

  seq->length = length;
  seq->number_of_alphabets = number_of_alphabets;
  seq->number_of_d_seqs = number_of_d_seqs;
  seq->seq = NULL;
  seq->d_value = NULL;
  if (number_of_alphabets > 0) {
    seq->seq = ighmm_dmatrix_alloc(number_of_alphabets, length); 
    if (!(seq->seq)) goto STOP;
  }
  if (number_of_d_seqs > 0) {
    seq->d_value = ighmm_cmatrix_alloc(number_of_d_seqs, length);
    if (!(seq->d_value)) goto STOP;
  }

  return seq;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  ghmm_dpseq_free(seq);
  return NULL;
#undef CUR_PROC
}
Beispiel #4
0
int ghmm_alloc_sample_data(ghmm_bayes_hmm *mo, ghmm_sample_data *data){
#define CUR_PROC "ghmm_alloc_sample_data"
//XXX must do alloc matrices for dim >1
    int i;
    data->transition = ighmm_cmatrix_alloc(mo->N, mo->N);
    ARRAY_MALLOC(data->state_data, mo->N);
    for(i = 0; i < mo->N; i++){
        ARRAY_MALLOC(data->state_data[i], mo->M[i]);
        /*for(i = 0; i < mo->M[i]; i++){//only needed for dim >1
            ghmm_alloc_emission_data(data->state_data[i][j], ghmm_bayes_hmm->params[i][j])
        }*/
    }
    return 0;
STOP:
    return -1;
#undef CUR_PROC
}
Beispiel #5
0
/**
   Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos]))
   which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos]
   @return ighmm_log_sum for products of a row from gamma and a row from matrix A
   @param log_a:      row of the transition matrix with logarithmic values (1.0 for log(0))
   @param s:          ghmm_dstate whose gamma-value is calculated
   @param parent:     a pointer to the parent hypothesis
*/
static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent) {
#define CUR_PROC "ighmm_log_gamma_sum"
  double result;
  int j, j_id, k;
  double max = 1.0;
  int argmax = 0;
  double *logP;

  /* shortcut for the trivial case */
  if (parent->gamma_states == 1)
    for (j = 0; j < s->in_states; j++)
      if (parent->gamma_id[0] == s->in_id[j])
        return parent->gamma_a[0] + log_a[j];

  ARRAY_MALLOC (logP, s->in_states);

  /* calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: */
  for (j = 0; j < s->in_states; j++) {
    j_id = s->in_id[j];
    /* search for state j_id in the gamma list */
    for (k = 0; k < parent->gamma_states; k++)
      if (parent->gamma_id[k] == j_id)
        break;
    if (k == parent->gamma_states)
      logP[j] = 1.0;
    else {
      logP[j] = log_a[j] + parent->gamma_a[k];
      if (max == 1.0 || (logP[j] > max && logP[j] != 1.0)) {
        max = logP[j];
        argmax = j;
      }
    }
  }

  /* calculate max+log(1+sum[j!=argmax; exp(logP[j]-max)])  */
  result = 1.0;
  for (j = 0; j < s->in_states; j++)
    if (j != argmax && logP[j] != 1.0)
      result += exp (logP[j] - max);

  result = log (result);
  result += max;

  free (logP);
  return result;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_log_gamma_sum failed\n");
  exit (1);
#undef CUR_PROC
}
Beispiel #6
0
/*===========================================================================*/
static int parseBackground(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int modelNo) {
#define CUR_PROC "parseBackground"

  int error, order;
  int bgNr, rev;
  double *b = NULL;
  char   *s = NULL;

  assert(f->modelType & GHMM_kDiscreteHMM);

  bgNr = f->model.d[modelNo]->bp->n++;

  /* get order */
  order = getIntAttribute(cur, "order", &error);
  if (error)
    order=0;
  else if (order && !(f->modelType & GHMM_kHigherOrderEmissions)) {
    GHMM_LOG(LERROR, "background distribution has order > 0, but model is not higher order");
    goto STOP;
  }
  f->model.d[modelNo]->bp->order[bgNr] = order;

  /* get name */
  s = (char *)getXMLCharAttribute(cur, "key", &error);
  f->model.d[modelNo]->bp->name[bgNr] = s;

  rev = getIntAttribute(cur, "rev", &error);
  if (error)
    rev = 0;

  /* get distribution */
  s = (char *)xmlNodeGetContent(cur);

  ARRAY_MALLOC(b, pow(f->model.d[modelNo]->bp->m, order+1));
  if (-1 !=  parseCSVList(s, pow(f->model.d[modelNo]->bp->m, order+1), b, rev))
    f->model.d[modelNo]->bp->b[bgNr] = b;
  else {
    GHMM_LOG(LERROR, "Can not parse background CSV list.");
    goto STOP;
  }
  free(s);

  return 0;
STOP:
  m_free(b);
  free(s);
  return -1;
#undef CUR_PROC
}
Beispiel #7
0
/*===========================================================================*/
static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {
#define CUR_PROC "parseAlphabet"

  char * str;
  int M, code, error;

  xmlNodePtr symbol;
  ghmm_alphabet * alfa;

  ARRAY_CALLOC(alfa, 1);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      code = getIntAttribute(symbol, "code", &error);
      if (error || code!=M) {
        str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M);
        GHMM_LOG(LERROR, str);
        m_free(str);
        goto STOP;
      } else
        M++;
    }
    symbol=symbol->next;
  }

  alfa->size = M;
  /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/
  ARRAY_MALLOC(alfa->symbols, M);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol);
      /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/
    }
    symbol=symbol->next;
  }

  return alfa;
STOP:
  m_free(alfa->symbols);
  m_free(alfa)
  return NULL;
#undef CUR_PROC
}
Beispiel #8
0
/*===========================================================================*/
static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) {
#define CUR_PROC "parseState"

  int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label;
  int curX=0, curY=0;
  double pi, prior;
  double *emissions = NULL;
  char *desc = NULL;
  char *s = NULL, *estr;
  int rev, stateFixed=1;
  ghmm_cstate *newcstate;
  ghmm_c_emission *emission;

  xmlNodePtr elem, child, multichild;

  state = getIntAttribute(cur, "id", &error);
  pi = getDoubleAttribute(cur, "initial", &error);
  if (error) {
    estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for"
                         "state %d", state);
    GHMM_LOG(LERROR, estr);
    goto STOP;
  } else

  desc = xmlGetProp(cur, BAD_CAST "desc");

  elem = cur->children;
  while (elem!=NULL) {
    /* ======== silent state ============================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) {
      switch (f->modelType & PTR_TYPE_MASK) {
      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
        f->model.dp[modelNo]->silent[state] = 1;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    /* ======== discrete state (possible higher order) ==================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) {
      assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0));

      /* fixed is a propety of the distribution and optional */
      fixed = getIntAttribute(elem, "fixed", &error);
      if (error)
        fixed = 0;

      /* order is optional for discrete */
      if (f->modelType & GHMM_kHigherOrderEmissions) {
        order = getIntAttribute(elem, "order", &error);
        if (error)
          order = 0;
      }

      rev = getIntAttribute(cur, "rev", &error);
      if (error)
        rev = 0;

      /* parsing emission probabilities */
      s = (char *)xmlNodeGetContent(elem);

      switch (f->modelType & PTR_TYPE_MASK) {

      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->s[state].desc = desc;
        f->model.d[modelNo]->s[state].pi = pi;
        f->model.d[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions) {
          f->model.d[modelNo]->order[state] = order;
          if (f->model.d[modelNo]->maxorder < order) {
            f->model.d[modelNo]->maxorder = order;
            estr = ighmm_mprintf(NULL, 0, "Updated maxorder to %d\n",
                                 f->model.d[modelNo]->maxorder);
            GHMM_LOG(LDEBUG, estr);
            m_free(estr);
          }
        }
        ARRAY_MALLOC(emissions, pow(f->model.d[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.d[modelNo]->M, order+1), emissions, rev);
        free(f->model.d[modelNo]->s[state].b);
        f->model.d[modelNo]->s[state].b = emissions;
        break;

      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->s[state].desc = desc;
        f->model.ds[modelNo]->s[state].pi = pi;
        f->model.ds[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions)
          f->model.ds[modelNo]->order[state] = order;
        ARRAY_MALLOC(emissions, pow(f->model.ds[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.ds[modelNo]->M, order+1), emissions, rev);
        f->model.ds[modelNo]->s[state].b = emissions;
        break;

      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
      m_free(s);
    }

    /* ======== continuous state ========================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "mixture"))) {
      assert(f->modelType & GHMM_kContinuousHMM);
      M = 0;
      child = elem->children;
      while (child != NULL) {
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          M ++;

        }
        child = child->next;
      }
      ghmm_cstate_alloc(f->model.c[modelNo]->s + state, M, inDegree[state], outDegree[state], f->model.c[modelNo]->cos);
      newcstate = f->model.c[modelNo]->s + state;

      newcstate->desc = desc;
      newcstate->M = M;
      newcstate->pi = pi;

      if( f->model.c[modelNo]->M < M)
        f->model.c[modelNo]->M = M;

      child = elem->children;

      i = 0;
      while (child != NULL) {

        emission = newcstate->e+i;

        /* common attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          fixed = getIntAttribute(child, "fixed", &error);
          if (error)
            fixed = 0;
          stateFixed = fixed && stateFixed;
          /* allocate emission */
          emission->fixed = fixed;

          prior = getDoubleAttribute(child, "prior", &error);
          if (error)
            prior = 1.0;
          newcstate->c[i] = prior;
        }
        /* child is not a density, continue with the next child */
        else {
          child = child->next;
          continue;
        }

        /* density type dependent attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          /* should the normal distribution be approximated? */
          aprox = getIntAttribute(child, "approx", &error);
          if (error)
            aprox = 0;
          emission->type      = aprox ? normal_approx : normal;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalLeftTail"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->min       = getDoubleAttribute(child, "max", &error);
          emission->type      = normal_left;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalRightTail"))) {

          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->max       = getDoubleAttribute(child, "min", &error);
          emission->type      = normal_right;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "uniform"))) {
          emission->max  = getDoubleAttribute(child, "max", &error);
          emission->min  = getDoubleAttribute(child, "min", &error);
          emission->type = uniform;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "multinormal"))) {
          emission->type = multinormal;
          emission->dimension = getIntAttribute(child, "dimension", &error);

          /* check that all emissions in all states have same dimension or
             set when first emission is read*/
          if (f->model.c[modelNo]->dim <= 1)
            f->model.c[modelNo]->dim = emission->dimension;
          else if (f->model.c[modelNo]->dim != emission->dimension) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }

          if (0 != ghmm_c_emission_alloc(emission, emission->dimension)) {
            GHMM_LOG(LERROR, "Can not allocate multinormal emission.");
            goto STOP;
          }
          multichild = child->children;
          while (multichild != NULL) {
            if ((!xmlStrcmp(multichild->name, BAD_CAST "mean"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension, emission->mean.vec, 0)) {
                GHMM_LOG(LERROR, "Can not parse mean CSV list.");
                goto STOP;
              }
            }
            if ((!xmlStrcmp(multichild->name, BAD_CAST "variance"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension * emission->dimension,
                                     emission->variance.mat, 0)) {
                GHMM_LOG(LERROR, "Can not parse variance CSV list.");
                goto STOP;
              }
              if (0 != ighmm_invert_det(emission->sigmainv, &emission->det,
                                        emission->dimension, emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate inverse of covariance matrix.");
                goto STOP;
              }
              if (0 != ighmm_cholesky_decomposition(emission->sigmacd,
                                                    emission->dimension,
                                                    emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate cholesky decomposition of covariance matrix.");
                goto STOP;
              }
            }
            multichild = multichild->next;
          }
        }
        i++;
        child = child->next;
      }
      newcstate->fix = stateFixed;
    }

    /* ======== pair hmm state ============================================ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "pair"))) {
    }

    /* -------- background name  ------------------------------------------ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "backgroundKey"))) {

      assert(f->modelType & GHMM_kBackgroundDistributions);

      s = (char *)xmlNodeGetContent(elem);

      for (i=0; i<f->model.d[modelNo]->bp->n; i++) {
        if (0 == strcmp(s, f->model.d[modelNo]->bp->name[i])) {
          if (order != f->model.d[modelNo]->bp->order[i]) {
            estr = ighmm_mprintf(NULL, 0, "order of background %s and state %d"
                                 " does not match",
                                 f->model.d[modelNo]->bp->name[i], state);
            GHMM_LOG(LERROR, estr);
            m_free(estr);
            goto STOP;
          } else {
            f->model.d[modelNo]->background_id[state] = i;
            break;
          }
        }
      }
      if (i == f->model.d[modelNo]->bp->n) {
        estr = ighmm_mprintf(NULL, 0, "can't find background with name %s in"
                             " state %d", s, state);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "class"))) {

      assert(f->modelType & GHMM_kLabeledStates);

      s = (char *)xmlNodeGetContent(elem);
      label = atoi(s);
      m_free(s);
      if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM) {
        if (f->model.d[modelNo]->label_alphabet->size > label)
          f->model.d[modelNo]->label[state] = label;
        else
          GHMM_LOG(LWARN, "Invalid label");
      }
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "tiedTo"))) {

      assert(f->modelType & GHMM_kTiedEmissions);

      s = (char *)xmlNodeGetContent(elem);
      tied = atoi(s);
      if (state>=tied) {
        f->model.d[modelNo]->tied_to[state] = tied;
        if (f->model.d[modelNo]->tied_to[tied] != tied) {
          estr = ighmm_mprintf(NULL, 0, "state %d not tied to tie group leader", state);
          GHMM_LOG(LERROR, estr);
          m_free(estr);
          goto STOP;
        }
      } else {
        estr = ighmm_mprintf(NULL, 0, "state %d tiedTo (%d) is invalid", state, tied);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- position for graphical editing ---------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "position"))) {
      curX = getIntAttribute(elem, "x", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read x position");
      curY = getIntAttribute(elem, "y", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read y position");

      switch (f->modelType & PTR_TYPE_MASK) {
      case GHMM_kDiscreteHMM:
        f->model.d[modelNo]->s[state].xPosition = curX;
        f->model.d[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kTransitionClasses:
        f->model.ds[modelNo]->s[state].xPosition = curX;
        f->model.ds[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kPairHMM:
      case GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses:
        f->model.dp[modelNo]->s[state].xPosition = curX;
        f->model.dp[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kContinuousHMM:
      case GHMM_kContinuousHMM+GHMM_kTransitionClasses:
      case (GHMM_kContinuousHMM+GHMM_kMultivariate):
      case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
        f->model.c[modelNo]->s[state].xPosition = curX;
        f->model.c[modelNo]->s[state].yPosition = curY;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    elem = elem->next;
  }

  return 0;
STOP:
  m_free(s);
  m_free(desc);
  m_free(emissions)
  return -1;
#undef CUR_PROC
}
void generateModel(model *mo, int noStates) {
# define CUR_PROC "generateModel"

  state *states;
  int i, j;

  /* flags indicating whether a state is silent */
  int *silent_array;

  /*allocate memory for states and array of silent flags*/
  ARRAY_MALLOC(states, noStates);
  silent_array = (int*)malloc(sizeof(int)*noStates);

  /* initialize all states as none silent*/
  for (i=0; i < noStates; i++) {
    silent_array[i] = 0;
  }
 
  mo->N = noStates;
  mo->M = 4;
  mo->maxorder = noStates-1;
  mo->prior = -1;
  /* Model has Higher order Emissions and labeled states*/
  mo->model_type =  kLabeledStates;
  if (mo->maxorder>0)
    mo->model_type += kHigherOrderEmissions;
  /* kHigherOrderEmissions + kHasBackgroundDistributions*/

  /* allocate memory for pow look-up table and fill it */
  ARRAY_MALLOC(mo->pow_lookup, mo->maxorder+1)
  
  mo->pow_lookup[0] = 1;
  for (i=1; i<mo->maxorder+1; i++)
    mo->pow_lookup[i] =  mo->pow_lookup[i-1] * mo->M;

  /*initialize states*/
  for (i=0; i < mo->N; i++) {
    states[i].pi = (0==i ? 1.0:0.0);
    states[i].fix = 0;
    states[i].label = i%3;
    states[i].order = i%2;
    states[i].out_states = 2;
    states[i].in_states = 2;

    /* allocate memory for the a, the out- and incoming States and b array for higher emmission order states*/
    states[i].b = (double*)malloc(sizeof(double) * pow(mo->M, (states[i].order+1) ));
    states[i].out_id = (int*)malloc(sizeof(int)*states[i].out_states);
    states[i].in_id = (int*)malloc(sizeof(int)*states[i].in_states);
    states[i].out_a = (double*)malloc(sizeof(double)*states[i].out_states);
    states[i].in_a = (double*)malloc(sizeof(double)*states[i].in_states);

    for (j = 0; j < pow(mo->M,states[i].order+1); j++){
      states[i].b[j] = ( (0==(i+j)%mo->M) ? .6 : .4 / (mo->M-1));
    }

    if ((mo->N-1)==i) {
      states[i].out_id[0] = 0;
      states[i].out_id[1] = i;
    }
    else {
      states[i].out_id[0] = i;
      states[i].out_id[1] = i+1;
    }

    if (0==i) {
      states[i].in_id[0]  = i;
      states[i].in_id[1]  = mo->N-1;
    }
    else {
      states[i].in_id[1]  = i-1;
      states[i].in_id[0]  = i;
    }

    states[i].out_a[0] = 0.5;
    states[i].out_a[1] = 0.5;
    states[i].in_a[0]  = 0.5;
    states[i].in_a[1]  = 0.5;

#ifdef DEBUG
    printf("State %d goto    : %d, %d\n", i, states[i].out_id[0], states[i].out_id[1]);
    printf("State %d comefrom: %d, %d\n", i, states[i].in_id[0],  states[i].in_id[1]);
    printf("State %d goto    : %g, %g\n", i, states[i].out_a[0], states[i].out_a[1]);
    printf("State %d comefrom: %g, %g\n", i, states[i].in_a[0],  states[i].in_a[1]);
#endif
  }

  mo->s = states;
  mo->silent = silent_array;

#ifdef DEBUG
  for (i = 0; i < mo->N; i++) {
    printf("\n State %d:\n", i);
    for (j = 0; j < pow(mo->M,states[i].order+1); j++){
      printf("%g ",mo->s[i].b[j]);
    }
  }
#endif
  model_print(stdout, mo);

STOP:
  printf("\n");

# undef CUR_PROC
}
Beispiel #10
0
/* ========================================================================= */
static int writeTransition(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo,
                           int sNo) {
#define CUR_PROC "writeTransition"


  int cos, i, j;
  int out_states, * out_id;
  double * * out_a;
  double * w_out_a;
  char * tmp;

  /* write state contents for different model types */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    out_states = f->model.d[moNo]->s[sNo].out_states;
    out_id     = f->model.d[moNo]->s[sNo].out_id;
    out_a      = &(f->model.d[moNo]->s[sNo].out_a);
    cos        = 1;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    out_states = f->model.ds[moNo]->s[sNo].out_states;
    out_id     = f->model.ds[moNo]->s[sNo].out_id;
    out_a      = f->model.ds[moNo]->s[sNo].out_a;
    cos        = f->model.ds[moNo]->cos;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    out_states = f->model.dp[moNo]->s[sNo].out_states;
    out_id     = f->model.dp[moNo]->s[sNo].out_id;
    out_a      = f->model.dp[moNo]->s[sNo].out_a;
    cos        = f->model.dp[moNo]->cos;
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    out_states = f->model.c[moNo]->s[sNo].out_states;
    out_id     = f->model.c[moNo]->s[sNo].out_id;
    out_a      = f->model.c[moNo]->s[sNo].out_a;
    cos        = f->model.c[moNo]->cos;
    break;
  default:
    GHMM_LOG(LCRITIC, "invalid modelType");}

  ARRAY_MALLOC(w_out_a, cos);

  for (i=0; i<out_states; i++) {
    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "transition")) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (transition)");
      goto STOP;
    }

    /* write source id (current state attribute */
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "source", "%d", sNo))
      GHMM_LOG(LERROR, "failed to write transition source attribute");

    /* write target id as attribute */
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "target", "%d", out_id[i]))
      GHMM_LOG(LERROR, "failed to write transition target attribute");

    for (j=0; j<cos; j++)
      w_out_a[j] = out_a[j][i];

    tmp = doubleArrayToCSV(w_out_a, cos);
    if (tmp) {
      if (0 > xmlTextWriterWriteElement(writer, BAD_CAST "probability", BAD_CAST tmp)) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (transition probabilities)");
        m_free(tmp);
        goto STOP;
      }
      m_free(tmp);
    } else {
      GHMM_LOG(LERROR, "converting transition probabilities array to CSV failed");
      goto STOP;
    }

    /* end transition */
    if (0 > xmlTextWriterEndElement(writer)) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (transition)");
      goto STOP;
    }
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Beispiel #11
0
/*============================================================================*/
static int ighmm_hlist_prop_forward (ghmm_dmodel * mo, hypoList * h, hypoList ** hplus,
				     int labels, int *nr_s, int *max_out) {
#define CUR_PROC "ighmm_hlist_prop_forward"
  int i, j, c, k;
  int i_id, j_id, g_nr;
  int no_oldHyps = 0, newHyps = 0;
  hypoList *hP = h;
  hypoList **created;

  ARRAY_MALLOC (created, labels);

  /* extend the all hypotheses with the labels of out_states
     of all states in the hypotesis */
  while (hP != NULL) {

    /* lookup table for labels, created[i]!=0 iff the current hypotheses
       was propagated forward with label i */
    for (c = 0; c < labels; c++)
      created[c] = NULL;

    /* extend the current hypothesis and add all states which may have
       probability greater null */
    for (i = 0; i < hP->gamma_states; i++) {
      /* skip impossible states */
      if (hP->gamma_a[i] == 1.0)
        continue;
      i_id = hP->gamma_id[i];
      for (j = 0; j < mo->s[i_id].out_states; j++) {
        j_id = mo->s[i_id].out_id[j];
        c = mo->label[j_id];

        /* create a new hypothesis with label c */
        if (!created[c]) {
          ighmm_hlist_insert (hplus, c, hP);
          created[c] = *hplus;
          /* initiallize gamma-array with safe size (number of states */
          ARRAY_MALLOC ((*hplus)->gamma_id, m_min (nr_s[c], hP->gamma_states * max_out[hP->hyp_c]));
          (*hplus)->gamma_id[0] = j_id;
          (*hplus)->gamma_states = 1;
          newHyps++;
        }
        /* add a new gamma state to the existing hypothesis with c */
        else {
          g_nr = created[c]->gamma_states;
          /* search for state j_id in the gamma list */
          for (k = 0; k < g_nr; k++)
            if (j_id == created[c]->gamma_id[k])
              break;
          /* add the state to the gamma list */
          if (k == g_nr) {
            created[c]->gamma_id[g_nr] = j_id;
            created[c]->gamma_states = g_nr + 1;
          }
        }
      }
    }
    /* reallocating gamma-array to the correct size */
    for (c = 0; c < labels; c++) {
      if (created[c]) {
        ARRAY_CALLOC (created[c]->gamma_a, created[c]->gamma_states);
        ARRAY_REALLOC (created[c]->gamma_id, created[c]->gamma_states);
        created[c] = NULL;
      }
    }
    hP = hP->next;
    no_oldHyps++;
  }

  /* printf("Created %d new Hypotheses.\n", newHyps); */
  free (created);
  return (no_oldHyps);
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_hlist_prop_forward failed\n");
  exit (1);
#undef CUR_PROC
}
Beispiel #12
0
/*============================================================================*/
int *ghmm_dmodel_label_kbest (ghmm_dmodel * mo, int *o_seq, int seq_len, int k, double *log_p)
{
#define CUR_PROC "ghmm_dl_kbest"
  int i, t, c, l, m;            /* counters */
  int no_oldHyps;               /* number of hypotheses until position t-1 */
  int b_index, i_id;            /* index for addressing states' b arrays */
  int no_labels = 0;
  int exists, g_nr;
  int *states_wlabel;
  int *label_max_out;
  char *str;

  /* logarithmized transition matrix A, log(a(i,j)) => log_a[i*N+j],
     1.0 for zero probability */
  double **log_a;

  /* matrix of hypotheses, holds for every position in the sequence a list
     of hypotheses */
  hypoList **h;
  hypoList *hP;

  /* vectors for rows in the matrices */
  int *hypothesis;

  /* pointer & prob. of the k most probable hypotheses for each state
     - matrices of dimensions #states x k:  argm(i,l) => argmaxs[i*k+l] */
  double *maxima;
  hypoList **argmaxs;

  /* pointer to & probability of most probable hypothesis in a certain state */
  hypoList *argmax;
  double sum;

  /* break if sequence empty or k<1: */
  if (seq_len <= 0 || k <= 0)
    return NULL;

  ARRAY_CALLOC (h, seq_len);

  /* 1. Initialization (extend empty hypothesis to #labels hypotheses of
         length 1): */

  /* get number of labels (= maximum label + 1)
     and number of states with those labels */
  ARRAY_CALLOC (states_wlabel, mo->N);
  ARRAY_CALLOC (label_max_out, mo->N);
  for (i = 0; i < mo->N; i++) {
    c = mo->label[i];
    states_wlabel[c]++;
    if (c > no_labels)
      no_labels = c;
    if (mo->s[i].out_states > label_max_out[c])
      label_max_out[c] = mo->s[i].out_states;
  }
  /* add one to the maximum label to get the number of labels */
  no_labels++;
  ARRAY_REALLOC (states_wlabel, no_labels);
  ARRAY_REALLOC (label_max_out, no_labels);

  /* initialize h: */
  hP = h[0];
  for (i = 0; i < mo->N; i++) {
    if (mo->s[i].pi > KBEST_EPS) {
      /* printf("Found State %d with initial probability %f\n", i, mo->s[i].pi); */
      exists = 0;
      while (hP != NULL) {
        if (hP->hyp_c == mo->label[i]) {
          /* add entry to the gamma list */
          g_nr = hP->gamma_states;
          hP->gamma_id[g_nr] = i;
          hP->gamma_a[g_nr] =
            log (mo->s[i].pi) +
            log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
          hP->gamma_states = g_nr + 1;
          exists = 1;
          break;
        }
        else
          hP = hP->next;
      }
      if (!exists) {
        ighmm_hlist_insert (&(h[0]), mo->label[i], NULL);
        /* initiallize gamma-array with safe size (number of states) and add the first entry */
        ARRAY_MALLOC (h[0]->gamma_a, states_wlabel[mo->label[i]]);
        ARRAY_MALLOC (h[0]->gamma_id, states_wlabel[mo->label[i]]);
        h[0]->gamma_id[0] = i;
        h[0]->gamma_a[0] =
          log (mo->s[i].pi) +
          log (mo->s[i].b[get_emission_index (mo, i, o_seq[0], 0)]);
        h[0]->gamma_states = 1;
        h[0]->chosen = 1;
      }
      hP = h[0];
    }
  }
  /* reallocating the gamma list to the real size */
  hP = h[0];
  while (hP != NULL) {
    ARRAY_REALLOC (hP->gamma_a, hP->gamma_states);
    ARRAY_REALLOC (hP->gamma_id, hP->gamma_states);
    hP = hP->next;
  }

  /* calculate transition matrix with logarithmic values: */
  log_a = kbest_buildLogMatrix (mo->s, mo->N);

  /* initialize temporary arrays: */
  ARRAY_MALLOC (maxima, mo->N * k);                             /* for each state save k */
  ARRAY_MALLOC (argmaxs, mo->N * k);


  /*------ Main loop: Cycle through the sequence: ------*/
  for (t = 1; t < seq_len; t++) {

    /* put o_seq[t-1] in emission history: */
    update_emission_history (mo, o_seq[t - 1]);

    /* 2. Propagate hypotheses forward and update gamma: */
    no_oldHyps =
      ighmm_hlist_prop_forward (mo, h[t - 1], &(h[t]), no_labels, states_wlabel,
                     label_max_out);
    /* printf("t = %d (%d), no of old hypotheses = %d\n", t, seq_len, no_oldHyps); */

    /*-- calculate new gamma: --*/
    hP = h[t];
    /* cycle through list of hypotheses */
    while (hP != NULL) {

      for (i = 0; i < hP->gamma_states; i++) {
        /* if hypothesis hP ends with label of state i:
           gamma(i,c):= log(sum(exp(a(j,i)*exp(oldgamma(j,old_c)))))
           + log(b[i](o_seq[t]))
           else: gamma(i,c):= -INF (represented by 1.0) */
        i_id = hP->gamma_id[i];
        hP->gamma_a[i] = ighmm_log_gamma_sum (log_a[i_id], &mo->s[i_id], hP->parent);
        b_index = get_emission_index (mo, i_id, o_seq[t], t);
        if (b_index < 0) {
          hP->gamma_a[i] = 1.0;
          if (mo->order[i_id] > t)
            continue;
          else {
            str = ighmm_mprintf (NULL, 0,
                           "i_id: %d, o_seq[%d]=%d\ninvalid emission index!\n",
                           i_id, t, o_seq[t]);
            GHMM_LOG(LCONVERTED, str);
            m_free (str);
          }
        }
        else
          hP->gamma_a[i] += log (mo->s[i_id].b[b_index]);
        /*printf("%g = %g\n", log(mo->s[i_id].b[b_index]), hP->gamma_a[i]); */
        if (hP->gamma_a[i] > 0.0) {
          GHMM_LOG(LCONVERTED, "gamma to large. ghmm_dl_kbest failed\n");
          exit (1);
        }
      }
      hP = hP->next;
    }

    /* 3. Choose the k most probable hypotheses for each state and discard all
	   hypotheses that were not chosen: */

    /* initialize temporary arrays: */
    for (i = 0; i < mo->N * k; i++) {
      maxima[i] = 1.0;
      argmaxs[i] = NULL;
    }

    /* cycle through hypotheses & calculate the k most probable hypotheses for
       each state: */
    hP = h[t];
    while (hP != NULL) {
      for (i = 0; i < hP->gamma_states; i++) {
        i_id = hP->gamma_id[i];
        if (hP->gamma_a[i] > KBEST_EPS)
          continue;
        /* find first best hypothesis that is worse than current hypothesis: */
        for (l = 0;
             l < k && maxima[i_id * k + l] < KBEST_EPS
             && maxima[i_id * k + l] > hP->gamma_a[i]; l++);
        if (l < k) {
          /* for each m>l: m'th best hypothesis becomes (m+1)'th best */
          for (m = k - 1; m > l; m--) {
            argmaxs[i_id * k + m] = argmaxs[i_id * k + m - 1];
            maxima[i_id * k + m] = maxima[i_id * k + m - 1];
          }
          /* save new l'th best hypothesis: */
          maxima[i_id * k + l] = hP->gamma_a[i];
          argmaxs[i_id * k + l] = hP;
        }
      }
      hP = hP->next;
    }

    /* set 'chosen' for all hypotheses from argmaxs array: */
    for (i = 0; i < mo->N * k; i++)
      /* only choose hypotheses whose prob. is at least threshold*max_prob */
      if (maxima[i] != 1.0
          && maxima[i] >= KBEST_THRESHOLD + maxima[(i % mo->N) * k])
        argmaxs[i]->chosen = 1;

    /* remove hypotheses that were not chosen from the lists: */
    /* remove all hypotheses till the first chosen one */
    while (h[t] != NULL && 0 == h[t]->chosen)
      ighmm_hlist_remove (&(h[t]));
    /* remove all other not chosen hypotheses */
    if (!h[t]) {
      GHMM_LOG(LCONVERTED, "No chosen hypothesis. ghmm_dl_kbest failed\n");
      exit (1);
    }
    hP = h[t];
    while (hP->next != NULL) {
      if (1 == hP->next->chosen)
        hP = hP->next;
      else
        ighmm_hlist_remove (&(hP->next));
    }
  }
  /* dispose of temporary arrays: */
  m_free(states_wlabel);
  m_free(label_max_out);
  m_free(argmaxs);
  m_free(maxima);
  /* transition matrix is no longer needed from here on */
  for (i=0; i<mo->N; i++)
    m_free(log_a[i]);
  m_free(log_a);

  /* 4. Save the hypothesis with the highest probability over all states: */
  hP = h[seq_len - 1];
  argmax = NULL;
  *log_p = 1.0;                 /* log_p will store log of maximum summed probability */
  while (hP != NULL) {
    /* sum probabilities for each hypothesis over all states: */
    sum = ighmm_cvector_log_sum (hP->gamma_a, hP->gamma_states);
    /* and select maximum sum */
    if (sum < KBEST_EPS && (*log_p == 1.0 || sum > *log_p)) {
      *log_p = sum;
      argmax = hP;
    }
    hP = hP->next;
  }

  /* found a valid path? */
  if (*log_p < KBEST_EPS) {
    /* yes: extract chosen hypothesis: */
    ARRAY_MALLOC (hypothesis, seq_len);
    for (i = seq_len - 1; i >= 0; i--) {
      hypothesis[i] = argmax->hyp_c;
      argmax = argmax->parent;
    }
  }
  else
    /* no: return 1.0 representing -INF and an empty hypothesis */
    hypothesis = NULL;

  /* dispose of calculation matrices: */
  hP = h[seq_len - 1];
  while (hP != NULL)
    ighmm_hlist_remove (&hP);
  free (h);
  return hypothesis;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ghmm_dl_kbest failed\n");
  exit (1);
#undef CUR_PROC
}