Exemplo n.º 1
0
/*===========================================================================*/
static int parseBackground(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int modelNo) {
#define CUR_PROC "parseBackground"

  int error, order;
  int bgNr, rev;
  double *b = NULL;
  char   *s = NULL;

  assert(f->modelType & GHMM_kDiscreteHMM);

  bgNr = f->model.d[modelNo]->bp->n++;

  /* get order */
  order = getIntAttribute(cur, "order", &error);
  if (error)
    order=0;
  else if (order && !(f->modelType & GHMM_kHigherOrderEmissions)) {
    GHMM_LOG(LERROR, "background distribution has order > 0, but model is not higher order");
    goto STOP;
  }
  f->model.d[modelNo]->bp->order[bgNr] = order;

  /* get name */
  s = (char *)getXMLCharAttribute(cur, "key", &error);
  f->model.d[modelNo]->bp->name[bgNr] = s;

  rev = getIntAttribute(cur, "rev", &error);
  if (error)
    rev = 0;

  /* get distribution */
  s = (char *)xmlNodeGetContent(cur);

  ARRAY_MALLOC(b, pow(f->model.d[modelNo]->bp->m, order+1));
  if (-1 !=  parseCSVList(s, pow(f->model.d[modelNo]->bp->m, order+1), b, rev))
    f->model.d[modelNo]->bp->b[bgNr] = b;
  else {
    GHMM_LOG(LERROR, "Can not parse background CSV list.");
    goto STOP;
  }
  free(s);

  return 0;
STOP:
  m_free(b);
  free(s);
  return -1;
#undef CUR_PROC
}
Exemplo n.º 2
0
/* ========================================================================= */
static int writeAlphabet(xmlTextWriterPtr writer, ghmm_alphabet * alfa, int type) {
#define CUR_PROC "writeAlphabet"

  int i;

  if (0 > xmlTextWriterStartElement(writer, BAD_CAST (type == kAlphabet ? "alphabet" : "classAlphabet"))) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement");
    goto STOP;;
  }

  if (type == kAlphabet)
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", alfa->id))
      GHMM_LOG_PRINTF(LERROR, LOC, "failed to write id-attribute for alphabet"
               "with id %d", alfa->id);

  for (i=0; i<alfa->size; i++) {
    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "symbol")) {
      GHMM_LOG_PRINTF(LERROR, LOC, "failed to start symbol-tag no %d", i);
      goto STOP;
    }
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "code", "%d", i)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "failed to write code-attribute for symbol %s"
                           "with code %d", alfa->symbols[i], i);
      goto STOP;
    }

    if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST replaceXMLEntity(alfa->symbols[i]))) {
      GHMM_LOG_PRINTF(LERROR, LOC, "failed to write symbol %s with code %d",
                           alfa->symbols[i], i);
      goto STOP;
    }

    if (0 > xmlTextWriterEndElement(writer)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "failed to end symbol-tag no %d", i);
      goto STOP;
    }
  }

  if (0 > xmlTextWriterEndElement(writer)) {
    GHMM_LOG(LERROR, "Error at ending alphabet");
    goto STOP;
  }

  return 0;
 STOP:
  return -1;
#undef CUR_PROC
}
Exemplo n.º 3
0
/*============================================================================*/
double ighmm_rand_get_1overa (double x, double mean, double u)
{
  /* Calulates 1/a(x, mean, u), with a = the integral from x til \infty over
     the Gauss density function */
# define CUR_PROC "ighmm_rand_get_1overa"

  double erfc_value;

  if (u <= 0.0) {
    GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n");
    goto STOP;
  }

#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
  erfc_value = erfc ((x - mean) / sqrt (u * 2));
#else
  erfc_value = ighmm_erfc ((x - mean) / sqrt (u * 2));
#endif

  if (erfc_value <= DBL_MIN) {
    ighmm_mes (MES_WIN, "a ~= 0.0 critical! (mue = %.2f, u =%.2f)\n", mean, u);
    return (erfc_value);
  }
  else
    return (2.0 / erfc_value);

STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* ighmm_rand_get_1overa */
Exemplo n.º 4
0
/* cumalative distribution function of a-truncated N(mean, u) */
double ighmm_rand_normal_right_cdf (double x, double mean, double u, double a)
{
# define CUR_PROC "ighmm_rand_normal_right_cdf"

  if (x <= a)
    return (0.0);
  if (u <= a) {
    GHMM_LOG(LCONVERTED, "u <= a not allowed\n");
    goto STOP;
  }
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
  /*
     Function: int erfc (double x, gsl_sf_result * result) 
     These routines compute the complementary error function
     erfc(x) = 1 - erf(x) = 2/\sqrt(\pi) \int_x^\infty \exp(-t^2). 
   */
  return 1.0 + (erf ((x - mean) / sqrt (u * 2)) -
                1.0) / erfc ((a - mean) / sqrt (u * 2));
#else
  return 1.0 + (ighmm_erf ((x - mean) / sqrt (u * 2)) -
                1.0) / ighmm_erfc ((a - mean) / sqrt (u * 2));
#endif /* Check for ISO C99 */
STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_normal_cdf */
Exemplo n.º 5
0
/*===========================================================================*/
static int parseModelType(const char * data, unsigned int size) {
#define CUR_PROC "parseModelType"

  int i, noValidMo, modelType=0;
  const char * end = data;
  char * str;

  while ((end = strchr(data, ' '))) {
    modelType += matchModelType(data, end-data);
    size -= (end-data)+1;
    data = end+1;
  }
  modelType += matchModelType(data, size);

  noValidMo = sizeof(validModelTypes)/sizeof(validModelTypes[0]);
  for (i=0; i<noValidMo; i++) {
    if (modelType == validModelTypes[i])
      break;
  }
  if (i == noValidMo) {
    str = ighmm_mprintf(NULL, 0, "%d is no known valid model type", modelType);
    GHMM_LOG(LERROR, str);
    m_free(str);
    return -1;
  }

  return modelType;
#undef CUR_PROC
}
Exemplo n.º 6
0
/* covariance matrix is linearized */
double ighmm_rand_binormal_density(const double *x, double *mean, double *cov)
{
# define CUR_PROC "ighmm_rand_binormal_density"
  double rho;
#ifndef DO_WITH_GSL
  double numerator,part1,part2,part3;
#endif
  if (cov[0] <= 0.0 || cov[2 + 1] <= 0.0) {
    GHMM_LOG(LCONVERTED, "variance <= 0.0 not allowed\n");
    goto STOP;
  }
  rho = cov[1] / ( sqrt (cov[0]) * sqrt (cov[2 + 1]) );
  /* The denominator is possibly < EPS??? Check that ? */
#ifdef DO_WITH_GSL
  /* double gsl_ran_bivariate_gaussian_pdf (double x, double y, double sigma_x,
                                            double sigma_y, double rho) */
  return gsl_ran_bivariate_gaussian_pdf (x[0], x[1], sqrt (cov[0]),
                                         sqrt (cov[2 + 1]), rho);
#else
  part1 = (x[0] - mean[0]) / sqrt (cov[0]);
  part2 = (x[1] - mean[1]) / sqrt (cov[2 + 1]);
  part3 = m_sqr (part1) - 2 * part1 * part2 + m_sqr (part2);
  numerator = exp ( -1 * (part3) / ( 2 * (1 - m_sqr(rho)) ) );
  return (numerator / ( 2 * PI * sqrt(1 - m_sqr(rho)) ));
#endif

STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_binormal_density */
Exemplo n.º 7
0
/* ========================================================================= */
static char * doubleArrayToCSV(double * array, int size) {
#define CUR_PROC "doubleArrayToCSV"

  int i, pos=0;
  char *csv=NULL;
  int singlelength = (2 + /* comma and space */
                      8 + /* 8 signifcant digits */
                      1 + /* sign */
                      5 + /* 'E' and signed mantissa */
                      3);   /* safety */
  int maxlength = size * singlelength;

  ARRAY_MALLOC(csv, maxlength);

  for (i=0; i < size-1 && pos + singlelength < maxlength; i++) {
    pos += sprintf(csv+pos, "%.8g, ", array[i]);
  }
  if (i < size-1 || pos + singlelength > maxlength) {
    GHMM_LOG(LERROR, "writing CSV failed");
    goto STOP;
  } else {
    pos += sprintf(csv+pos, "%.8g", array[i]);
  }
  /*printf("%d bytes of %d written\n", pos, maxlength);*/
  return csv;
STOP:
  free(csv);
  return NULL;
#undef  CUR_PROC
}
Exemplo n.º 8
0
/*============================================================================*/
double ighmm_rand_normal_density_trunc(double x, double mean, double u,
                                       double a)
{
# define CUR_PROC "ighmm_rand_normal_density_trunc"
#ifndef DO_WITH_GSL
  double c;
#endif /* DO_WITH_GSL */

  if (u <= 0.0) {
    GHMM_LOG(LERROR, "u <= 0.0 not allowed");
    goto STOP;
  }
  if (x < a)
    return 0.0;

#ifdef DO_WITH_GSL
  /* move mean to the right position */
  return gsl_ran_gaussian_tail_pdf(x - mean, a - mean, sqrt(u));
#else
  if ((c = ighmm_rand_get_1overa(a, mean, u)) == -1) {
    GHMM_LOG_QUEUED(LERROR);
    goto STOP;
  };
  return c * ighmm_rand_normal_density(x, mean, u);
#endif /* DO_WITH_GSL */

STOP:
  return -1.0;
# undef CUR_PROC
}                               /* double ighmm_rand_normal_density_trunc */
Exemplo n.º 9
0
/* write mean vector and covariance matrix as elements for multinormals */
static int writeMultiNormal(xmlTextWriterPtr writer, ghmm_c_emission *emission)
{
#define CUR_PROC "writeMultiNormal"

    char *tmp=NULL;

    /* writing mean vector*/
    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mean")) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mean)");
        goto STOP;
    }
    tmp = doubleArrayToCSV(emission->mean.vec, emission->dimension);
    if (tmp) {
        if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
            GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing mean vector CSV");
            goto STOP;
        }
        m_free(tmp);
        tmp = NULL;
    } else {
        GHMM_LOG(LERROR, "converting array to CSV failed for mean vector");
        goto STOP;
    }
    if (0 > xmlTextWriterEndElement(writer)) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement mean");
        goto STOP;
    }

    /* writing covariance matrix*/
    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "variance")) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (variance)");
        goto STOP;
    }
    tmp = doubleArrayToCSV(emission->variance.mat, emission->dimension * emission->dimension);
    if (tmp) {
        if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
            GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing variance matrix CSV");
            goto STOP;
        }
        m_free(tmp);
        tmp = NULL;
    } else {
        GHMM_LOG(LERROR, "converting array to CSV failed for covariance matrix");
        goto STOP;
    }
    if (0 > xmlTextWriterEndElement(writer)) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement variance");
        goto STOP;
    }

    return 0;
STOP:
    free(tmp);
    return -1;
#undef CUR_PROC
}
Exemplo n.º 10
0
/*===========================================================================*/
static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) {
#define CUR_PROC "parseCSVList"

  int retval=0;
  int i;
  char * * next, * estr;
  double tmp;

  ARRAY_CALLOC(next, 1);

  for (i=0; i<size; i++) {
    array[i] = strtod(data, next);
    if (data == *next) {
      estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next);
      GHMM_LOG(LERROR, estr);
      m_free(estr);
      retval=-1;
      break;
    }
    if (next)
      data = *next+1;
    else
      break;
  }

  if (i != size) {
    retval=-1;
    estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size);
    GHMM_LOG(LERROR, estr);
    m_free(estr);
  }

  if (reverse) {
    for (i=0; i<size/2; i++) {
      tmp = array[i];
      array[i] = array[size-i-1];
      array[size-i-1] = tmp;
    }
  }

STOP:
  m_free(next);
  return retval;
#undef CUR_PROC
}
Exemplo n.º 11
0
double ighmm_rand_normal_right (double a, double mue, double u, int seed)
{
# define CUR_PROC "ighmm_rand_normal_right"
  double x = -1;
  double sigma;
#ifdef DO_WITH_GSL
  double s;
#else
  double U, Us, Us1, Feps, t, T;
#endif

  if (u <= 0.0) {
    GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n");
    goto STOP;
  }
  sigma = sqrt(u);

  if (seed != 0) {
    GHMM_RNG_SET (RNG, seed);
  }

#ifdef DO_WITH_GSL
  /* move boundary to lower values in order to achieve maximum at mue
     gsl_ran_gaussian_tail(generator, lower_boundary, sigma)
   */
  return mue + gsl_ran_gaussian_tail(RNG, a - mue, sqrt (u));

#else /* DO_WITH_GSL */

  /* Inverse transformation with restricted sampling by Fishman */
  U = GHMM_RNG_UNIFORM(RNG);
  Feps = ighmm_rand_get_PHI((a-mue) / sigma);

  Us = Feps + (1-Feps) * U;
  Us1 = 1-Us;
  t = m_min (Us, Us1);

  t = sqrt (-log (t * t));

  T =
    sigma * (t - (C0 + t * (C1 + t * C2))
                 / (1 + t * (D1 + t * (D2 + t * D3))));

  if (Us < Us1)
    x = mue - T;
  else
    x = mue + T;
#endif /* DO_WITH_GSL */

STOP:
  return x;
# undef CUR_PROC
}                               /* randvar_normal_pos */
Exemplo n.º 12
0
/**
   Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos]))
   which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos]
   @return ighmm_log_sum for products of a row from gamma and a row from matrix A
   @param log_a:      row of the transition matrix with logarithmic values (1.0 for log(0))
   @param s:          ghmm_dstate whose gamma-value is calculated
   @param parent:     a pointer to the parent hypothesis
*/
static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent) {
#define CUR_PROC "ighmm_log_gamma_sum"
  double result;
  int j, j_id, k;
  double max = 1.0;
  int argmax = 0;
  double *logP;

  /* shortcut for the trivial case */
  if (parent->gamma_states == 1)
    for (j = 0; j < s->in_states; j++)
      if (parent->gamma_id[0] == s->in_id[j])
        return parent->gamma_a[0] + log_a[j];

  ARRAY_MALLOC (logP, s->in_states);

  /* calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: */
  for (j = 0; j < s->in_states; j++) {
    j_id = s->in_id[j];
    /* search for state j_id in the gamma list */
    for (k = 0; k < parent->gamma_states; k++)
      if (parent->gamma_id[k] == j_id)
        break;
    if (k == parent->gamma_states)
      logP[j] = 1.0;
    else {
      logP[j] = log_a[j] + parent->gamma_a[k];
      if (max == 1.0 || (logP[j] > max && logP[j] != 1.0)) {
        max = logP[j];
        argmax = j;
      }
    }
  }

  /* calculate max+log(1+sum[j!=argmax; exp(logP[j]-max)])  */
  result = 1.0;
  for (j = 0; j < s->in_states; j++)
    if (j != argmax && logP[j] != 1.0)
      result += exp (logP[j] - max);

  result = log (result);
  result += max;

  free (logP);
  return result;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_log_gamma_sum failed\n");
  exit (1);
#undef CUR_PROC
}
Exemplo n.º 13
0
/*===========================================================================*/
static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) {
#define CUR_PROC "parseAlphabet"

  char * str;
  int M, code, error;

  xmlNodePtr symbol;
  ghmm_alphabet * alfa;

  ARRAY_CALLOC(alfa, 1);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      code = getIntAttribute(symbol, "code", &error);
      if (error || code!=M) {
        str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M);
        GHMM_LOG(LERROR, str);
        m_free(str);
        goto STOP;
      } else
        M++;
    }
    symbol=symbol->next;
  }

  alfa->size = M;
  /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/
  ARRAY_MALLOC(alfa->symbols, M);

  symbol = cur->children;
  M=0;
  while (symbol!=NULL) {
    if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) {
      alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol);
      /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/
    }
    symbol=symbol->next;
  }

  return alfa;
STOP:
  m_free(alfa->symbols);
  m_free(alfa)
  return NULL;
#undef CUR_PROC
}
Exemplo n.º 14
0
/* ========================================================================= */
static char * strModeltype(int modelType) {
#define CUR_PROC "strModeltype"

  int end;
  char * mt;

  ARRAY_CALLOC(mt, 200);

  if (modelType > 0) {
    if (modelType & GHMM_kLeftRight)
      strcat(mt, "left-right ");
    if (modelType & GHMM_kSilentStates)
      strcat(mt, "silent ");
    if (modelType & GHMM_kTiedEmissions)
      strcat(mt, "tied ");
    if (modelType & GHMM_kHigherOrderEmissions)
      strcat(mt, "higher-order ");
    if (modelType & GHMM_kBackgroundDistributions)
      strcat(mt, "background ");
    if (modelType & GHMM_kLabeledStates)
      strcat(mt, "labeled ");
    if (modelType & GHMM_kTransitionClasses)
      strcat(mt, "transition-classes ");
    if (modelType & GHMM_kDiscreteHMM)
      strcat(mt, "discrete ");
    if (modelType & GHMM_kContinuousHMM)
      strcat(mt, "continuous ");
    if (modelType & GHMM_kPairHMM)
      strcat(mt, "pair ");
    if (modelType & GHMM_kMultivariate)
      strcat(mt, "multivariate ");
  } else {
    GHMM_LOG(LERROR, "can't write models with unspecified modeltype");
    goto STOP;
  }

  /* overwrite the last space */
  end = strlen(mt);
  mt[end-1] = '\0';

  return mt;
 STOP:
  m_free(mt);
  return NULL;
#undef CUR_PROC
}
Exemplo n.º 15
0
/* cumalative distribution function of N(mean, u) */
double ighmm_rand_normal_cdf (double x, double mean, double u)
{
# define CUR_PROC "ighmm_rand_normal_cdf"
  if (u <= 0.0) {
    GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n");
    goto STOP;
  }
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
  /* PHI(x)=erf(x/sqrt(2))/2+0.5 */
  return (erf ((x - mean) / sqrt (u * 2.0)) + 1.0) / 2.0;
#else
  return (ighmm_erf ((x - mean) / sqrt (u * 2.0)) + 1.0) / 2.0;
#endif /* Check for ISO C99 */
STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_normal_cdf */
Exemplo n.º 16
0
/* cumalative distribution function of a uniform distribution in the range [min,max] */
double ighmm_rand_uniform_cdf (double x, double max, double min)
{
# define CUR_PROC "ighmm_rand_uniform_cdf"
  if (max <= min) {
    GHMM_LOG(LCONVERTED, "max <= min not allowed\n");
    goto STOP;
  }  
  if (x < min) {
    return 0.0;
  }
  if (x >= max) {
    return 1.0;
  }
  return (x-min)/(max-min);
STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* ighmm_rand_uniform_cdf */
Exemplo n.º 17
0
/*============================================================================*/
double ighmm_rand_uniform_density (double x, double max, double min)
{
# define CUR_PROC "ighmm_rand_uniform_density"
  double prob;
  if (max <= min) {
    GHMM_LOG(LCONVERTED, "max <= min not allowed \n");
    goto STOP;
  }
  prob = 1.0/(max-min);

  if ( (x <= max) && (x>=min) ){
    return prob;
  }else{
    return 0.0;
  }
STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_uniform_density */
Exemplo n.º 18
0
/* ========================================================================= */
static int writeBackground(xmlTextWriterPtr writer, ghmm_dbackground* bg) {
#define CUR_PROC "writeBackground"

  int i;
  char * tmp=NULL;

  for (i=0; i<bg->n; i++) {

    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "background")) {
      GHMM_LOG_PRINTF(LERROR, LOC, "Error at starting backgroung %d", i);
      return -1;
    }

    if (!(bg->name)) {
      if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "key", "bg_%d", i))
        GHMM_LOG(LERROR, "Error at writing background key");
    }
    else {
      if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST (bg->name[i])))
        GHMM_LOG(LERROR, "Error at writing background key");
    }

    if (0 < bg->order[i])
      if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "order", "%d", bg->order[i]))
        GHMM_LOG(LERROR, "can't write background order attribute");

    tmp = doubleArrayToCSV(bg->b[i], pow(bg->m, bg->order[i]+1));
    if (tmp) {
      if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing"
                 "background distribution CSV");
        m_free(tmp);
        return -1;
      }
      m_free(tmp);
    } else {
      GHMM_LOG(LERROR, "converting array to CSV failed for background distribution");
      return -1;
    }

    if (0 > xmlTextWriterEndElement(writer)) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement while ending"
               "background distribution");
      return -1;
    }
  }
  return 0;
#undef CUR_PROC
}
Exemplo n.º 19
0
/* inserts new hypothesis into list at position indicated by pointer plist */
static void ighmm_hlist_insert (hypoList ** plist, int newhyp,
                              hypoList * parlist)
{
#define CUR_PROC "ighmm_hlist_insert"
  hypoList *newlist;

  ARRAY_CALLOC (newlist, 1);
  newlist->hyp_c = newhyp;
  if (parlist)
    parlist->refcount += 1;
  newlist->parent = parlist;
  newlist->next = *plist;

  *plist = newlist;
  return;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  GHMM_LOG(LCONVERTED, "ighmm_hlist_insert failed\n");
  exit (1);
#undef CUR_PROC
}
Exemplo n.º 20
0
/*===========================================================================*/
double ighmm_rand_uniform_cont (int seed, double max, double min)
{
# define CUR_PROC "ighmm_rand_uniform_cont"
  if (max <= min) {
    GHMM_LOG(LCONVERTED, "max <= min not allowed\n");
    goto STOP;
  }
  if (seed != 0) {
    GHMM_RNG_SET (RNG, seed);
  }

#ifdef DO_WITH_GSL
    return (double)(((double)gsl_rng_uniform (RNG)*(max-min)) + min);
#else
    return (double)((GHMM_RNG_UNIFORM (RNG))*(max-min) + min );
#endif

STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* ighmm_rand_uniform_cont */
Exemplo n.º 21
0
/* PROBLEM: Entries can get very small and be rounded to 0 */
int ighmm_cvector_normalize (double *v, int len)
{
#define CUR_PROC "ighmm_cvector_normalize"
  int i;
  double sum = 0.0;
  char * estr;

  for (i = 0; i < len; i++)
    sum += v[i];
  if (i>0 && sum<GHMM_EPS_PREC) {
    estr = ighmm_mprintf(NULL, 0, "Can't normalize vector. Sum smaller than %g\n"
			, GHMM_EPS_PREC);
    GHMM_LOG(LWARN, estr);
    m_free(estr);
    return (-1);
  }
  for (i = 0; i < len; i++)
    v[i] /= sum;
  return 0;
#undef CUR_PROC
}                               /* ighmm_cvector_normalize */
Exemplo n.º 22
0
double ighmm_rand_normal_density_approx (double x, double mean, double u)
{
# define CUR_PROC "ighmm_rand_normal_density_approx"
#ifdef HAVE_LIBPTHREAD
  static pthread_mutex_t lock;
#endif /* HAVE_LIBPTHREAD */
  int i;
  double y, z, pdf_x;
  if (u <= 0.0) {
    GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n");
    goto STOP;
  }
  if (!pdf_stdnormal_exists) {
#ifdef HAVE_LIBPTHREAD
    pthread_mutex_lock (&lock); /* Put on a lock, because the clustering is parallel   */
#endif /* HAVE_LIBPTHREAD */
    randvar_init_pdf_stdnormal ();
#ifdef HAVE_LIBPTHREAD
    pthread_mutex_unlock (&lock);       /* Take the lock off */
#endif /* HAVE_LIBPTHREAD */
  }
  y = 1 / sqrt (u);
  z = fabs ((x - mean) * y);
  i = (int) (z * X_FAKT_PDF);
  /* linear interpolation: */
  if (i >= PDFLEN - 1) {
    i = PDFLEN - 1;
    pdf_x = y * pdf_stdnormal[i];
  }
  else
    pdf_x = y * (pdf_stdnormal[i] +
                 (z - i * X_STEP_PDF) *
                 (pdf_stdnormal[i + 1] - pdf_stdnormal[i]) / X_STEP_PDF);
  return (pdf_x);
STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_normal_density_approx */
Exemplo n.º 23
0
/*============================================================================*/
double ighmm_rand_normal_density (double x, double mean, double u)
{
# define CUR_PROC "ighmm_rand_normal_density"
#ifndef DO_WITH_GSL
  double expo;
#endif
  if (u <= 0.0) {
    GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n");
    goto STOP;
  }
  /* The denominator is possibly < EPS??? Check that ? */
#ifdef DO_WITH_GSL
  /* double gsl_ran_gaussian_pdf (double x, double sigma) */
  return gsl_ran_gaussian_pdf (x - mean, sqrt (u));
#else
  expo = exp (-1 * m_sqr (mean - x) / (2 * u));
  return (1 / (sqrt (2 * PI * u)) * expo);
#endif

STOP:
  return (-1.0);
# undef CUR_PROC
}                               /* double ighmm_rand_normal_density */
Exemplo n.º 24
0
/* ========================================================================= */
static int writeContinuousStateContents(xmlTextWriterPtr writer, ghmm_xmlfile* f,
                                      int moNo, int sNo) {
#define CUR_PROC "writeContinuousStateContents"

  int i;
  ghmm_cstate *state = f->model.c[moNo]->s + sNo;
  int allFixed = state->fix;
  ghmm_c_emission *emission;

  /* writing continuous distribution */
  if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mixture")) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mixture)");
    goto STOP;
  }

  if (f->model.c[moNo]->s[sNo].fix)
    allFixed = 1;

  for(i=0; i < f->model.c[moNo]->s[sNo].M; i++){
    emission = f->model.c[moNo]->s[sNo].e+i;
    switch (emission->type) {
      case normal:
        if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normal")) {
          GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normal)");
          goto STOP;
        }
        WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
        WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
        break;
      case multinormal:
        if (0 > xmlTextWriterStartElement(writer, BAD_CAST "multinormal")) {
          GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (multinormal)");
          goto STOP;
        }
        if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "dimension",
                                                  "%d", emission->dimension)) {
          GHMM_LOG(LERROR, "failed to write dimension attribute");
          goto STOP;
        }
        break;
      case normal_left:
        if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalLeftTail")) {
          GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalLeftTail)");
          goto STOP;
        }
        WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
        WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
        WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->min);
        break;
      case normal_right:
        if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalRightTail")) {
          GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalRightTail)");
          goto STOP;
        }
        WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val);
        WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val);
        WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->max);
        break;
      case uniform:
        if (0 > xmlTextWriterStartElement(writer, BAD_CAST "uniform")) {
          GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (uniform)");
          goto STOP;
        }
        WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->min);
        WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->max);
        break;
      default:
        GHMM_LOG_PRINTF(LERROR, LOC, "invalid density %d at position %d", emission->type, i);
        goto STOP;
    }

    /*optional values */
    if (allFixed || emission->fixed) {
      if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "fixed", BAD_CAST "1")) {
        GHMM_LOG(LERROR, "failed to set fixed attribute");
        goto STOP;
      }
    }
    if (state->M > 1) {
      WRITE_DOUBLE_ATTRIBUTE(writer, "prior", state->c[i]);
    }

    /* write mean vector and covariance matrix as childs for multinormal */
    if (emission->type == multinormal) {
      if (0 > writeMultiNormal(writer, emission)) {
        GHMM_LOG(LERROR, "failed to write mean and covariance childs");
        goto STOP;
      }
    }

    if (0 > xmlTextWriterEndElement(writer)) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (all densities)");
      goto STOP;
    }
  }

  /* end mixture tag */
  if (0 > xmlTextWriterEndElement(writer)) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (mixture)");
    goto STOP;
  }

  /* writing positions */
  if ((state->xPosition > 0) && (state->yPosition > 0)) {
    if (xmlTextWriterStartElement(writer, BAD_CAST "position") < 0) {
      GHMM_LOG(LERROR, "failed to start position element (position)");
      goto STOP;
    }
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "x", "%d",
                                              state->xPosition)) {
      GHMM_LOG(LERROR, "failed to write x position");
      goto STOP;
    }
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "y", "%d",
                                              state->yPosition)) {
      GHMM_LOG(LERROR, "failed to write y position");
      goto STOP;
    }
    if (xmlTextWriterEndElement(writer) < 0) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (position)");
      goto STOP;
    }
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Exemplo n.º 25
0
/*===========================================================================*/
static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) {
#define CUR_PROC "parseState"

  int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label;
  int curX=0, curY=0;
  double pi, prior;
  double *emissions = NULL;
  char *desc = NULL;
  char *s = NULL, *estr;
  int rev, stateFixed=1;
  ghmm_cstate *newcstate;
  ghmm_c_emission *emission;

  xmlNodePtr elem, child, multichild;

  state = getIntAttribute(cur, "id", &error);
  pi = getDoubleAttribute(cur, "initial", &error);
  if (error) {
    estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for"
                         "state %d", state);
    GHMM_LOG(LERROR, estr);
    goto STOP;
  } else

  desc = xmlGetProp(cur, BAD_CAST "desc");

  elem = cur->children;
  while (elem!=NULL) {
    /* ======== silent state ============================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) {
      switch (f->modelType & PTR_TYPE_MASK) {
      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->silent[state] = 1;
        break;
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
      case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
        f->model.dp[modelNo]->silent[state] = 1;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    /* ======== discrete state (possible higher order) ==================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) {
      assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0));

      /* fixed is a propety of the distribution and optional */
      fixed = getIntAttribute(elem, "fixed", &error);
      if (error)
        fixed = 0;

      /* order is optional for discrete */
      if (f->modelType & GHMM_kHigherOrderEmissions) {
        order = getIntAttribute(elem, "order", &error);
        if (error)
          order = 0;
      }

      rev = getIntAttribute(cur, "rev", &error);
      if (error)
        rev = 0;

      /* parsing emission probabilities */
      s = (char *)xmlNodeGetContent(elem);

      switch (f->modelType & PTR_TYPE_MASK) {

      case (GHMM_kDiscreteHMM):
        f->model.d[modelNo]->s[state].desc = desc;
        f->model.d[modelNo]->s[state].pi = pi;
        f->model.d[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions) {
          f->model.d[modelNo]->order[state] = order;
          if (f->model.d[modelNo]->maxorder < order) {
            f->model.d[modelNo]->maxorder = order;
            estr = ighmm_mprintf(NULL, 0, "Updated maxorder to %d\n",
                                 f->model.d[modelNo]->maxorder);
            GHMM_LOG(LDEBUG, estr);
            m_free(estr);
          }
        }
        ARRAY_MALLOC(emissions, pow(f->model.d[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.d[modelNo]->M, order+1), emissions, rev);
        free(f->model.d[modelNo]->s[state].b);
        f->model.d[modelNo]->s[state].b = emissions;
        break;

      case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
        f->model.ds[modelNo]->s[state].desc = desc;
        f->model.ds[modelNo]->s[state].pi = pi;
        f->model.ds[modelNo]->s[state].fix = fixed;
        if (f->modelType & GHMM_kHigherOrderEmissions)
          f->model.ds[modelNo]->order[state] = order;
        ARRAY_MALLOC(emissions, pow(f->model.ds[modelNo]->M, order+1));
        parseCSVList(s, pow(f->model.ds[modelNo]->M, order+1), emissions, rev);
        f->model.ds[modelNo]->s[state].b = emissions;
        break;

      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
      m_free(s);
    }

    /* ======== continuous state ========================================== */
    if ((!xmlStrcmp(elem->name, BAD_CAST "mixture"))) {
      assert(f->modelType & GHMM_kContinuousHMM);
      M = 0;
      child = elem->children;
      while (child != NULL) {
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          M ++;

        }
        child = child->next;
      }
      ghmm_cstate_alloc(f->model.c[modelNo]->s + state, M, inDegree[state], outDegree[state], f->model.c[modelNo]->cos);
      newcstate = f->model.c[modelNo]->s + state;

      newcstate->desc = desc;
      newcstate->M = M;
      newcstate->pi = pi;

      if( f->model.c[modelNo]->M < M)
        f->model.c[modelNo]->M = M;

      child = elem->children;

      i = 0;
      while (child != NULL) {

        emission = newcstate->e+i;

        /* common attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) ||
            (!xmlStrcmp(child->name, BAD_CAST "multinormal")) ||
            (!xmlStrcmp(child->name, BAD_CAST "uniform"))){
          fixed = getIntAttribute(child, "fixed", &error);
          if (error)
            fixed = 0;
          stateFixed = fixed && stateFixed;
          /* allocate emission */
          emission->fixed = fixed;

          prior = getDoubleAttribute(child, "prior", &error);
          if (error)
            prior = 1.0;
          newcstate->c[i] = prior;
        }
        /* child is not a density, continue with the next child */
        else {
          child = child->next;
          continue;
        }

        /* density type dependent attributes */
        if ((!xmlStrcmp(child->name, BAD_CAST "normal"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          /* should the normal distribution be approximated? */
          aprox = getIntAttribute(child, "approx", &error);
          if (error)
            aprox = 0;
          emission->type      = aprox ? normal_approx : normal;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalLeftTail"))) {
          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->min       = getDoubleAttribute(child, "max", &error);
          emission->type      = normal_left;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "normalRightTail"))) {

          emission->mean.val  = getDoubleAttribute(child, "mean", &error);
          emission->variance.val = getDoubleAttribute(child, "variance", &error);
          emission->max       = getDoubleAttribute(child, "min", &error);
          emission->type      = normal_right;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "uniform"))) {
          emission->max  = getDoubleAttribute(child, "max", &error);
          emission->min  = getDoubleAttribute(child, "min", &error);
          emission->type = uniform;
          emission->dimension = 1;
          if (f->model.c[modelNo]->dim > 1) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }
        }
        if ((!xmlStrcmp(child->name, BAD_CAST "multinormal"))) {
          emission->type = multinormal;
          emission->dimension = getIntAttribute(child, "dimension", &error);

          /* check that all emissions in all states have same dimension or
             set when first emission is read*/
          if (f->model.c[modelNo]->dim <= 1)
            f->model.c[modelNo]->dim = emission->dimension;
          else if (f->model.c[modelNo]->dim != emission->dimension) {
            GHMM_LOG(LERROR, "All emissions must have same dimension.");
            goto STOP;
          }

          if (0 != ghmm_c_emission_alloc(emission, emission->dimension)) {
            GHMM_LOG(LERROR, "Can not allocate multinormal emission.");
            goto STOP;
          }
          multichild = child->children;
          while (multichild != NULL) {
            if ((!xmlStrcmp(multichild->name, BAD_CAST "mean"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension, emission->mean.vec, 0)) {
                GHMM_LOG(LERROR, "Can not parse mean CSV list.");
                goto STOP;
              }
            }
            if ((!xmlStrcmp(multichild->name, BAD_CAST "variance"))) {
              s = (char *)xmlNodeGetContent(multichild);
              if (-1 == parseCSVList(s, emission->dimension * emission->dimension,
                                     emission->variance.mat, 0)) {
                GHMM_LOG(LERROR, "Can not parse variance CSV list.");
                goto STOP;
              }
              if (0 != ighmm_invert_det(emission->sigmainv, &emission->det,
                                        emission->dimension, emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate inverse of covariance matrix.");
                goto STOP;
              }
              if (0 != ighmm_cholesky_decomposition(emission->sigmacd,
                                                    emission->dimension,
                                                    emission->variance.mat))
              {
                GHMM_LOG(LERROR, "Can not calculate cholesky decomposition of covariance matrix.");
                goto STOP;
              }
            }
            multichild = multichild->next;
          }
        }
        i++;
        child = child->next;
      }
      newcstate->fix = stateFixed;
    }

    /* ======== pair hmm state ============================================ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "pair"))) {
    }

    /* -------- background name  ------------------------------------------ */
    if ((!xmlStrcmp(elem->name, BAD_CAST "backgroundKey"))) {

      assert(f->modelType & GHMM_kBackgroundDistributions);

      s = (char *)xmlNodeGetContent(elem);

      for (i=0; i<f->model.d[modelNo]->bp->n; i++) {
        if (0 == strcmp(s, f->model.d[modelNo]->bp->name[i])) {
          if (order != f->model.d[modelNo]->bp->order[i]) {
            estr = ighmm_mprintf(NULL, 0, "order of background %s and state %d"
                                 " does not match",
                                 f->model.d[modelNo]->bp->name[i], state);
            GHMM_LOG(LERROR, estr);
            m_free(estr);
            goto STOP;
          } else {
            f->model.d[modelNo]->background_id[state] = i;
            break;
          }
        }
      }
      if (i == f->model.d[modelNo]->bp->n) {
        estr = ighmm_mprintf(NULL, 0, "can't find background with name %s in"
                             " state %d", s, state);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "class"))) {

      assert(f->modelType & GHMM_kLabeledStates);

      s = (char *)xmlNodeGetContent(elem);
      label = atoi(s);
      m_free(s);
      if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM) {
        if (f->model.d[modelNo]->label_alphabet->size > label)
          f->model.d[modelNo]->label[state] = label;
        else
          GHMM_LOG(LWARN, "Invalid label");
      }
    }

    /* -------- tied to --------------------------------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "tiedTo"))) {

      assert(f->modelType & GHMM_kTiedEmissions);

      s = (char *)xmlNodeGetContent(elem);
      tied = atoi(s);
      if (state>=tied) {
        f->model.d[modelNo]->tied_to[state] = tied;
        if (f->model.d[modelNo]->tied_to[tied] != tied) {
          estr = ighmm_mprintf(NULL, 0, "state %d not tied to tie group leader", state);
          GHMM_LOG(LERROR, estr);
          m_free(estr);
          goto STOP;
        }
      } else {
        estr = ighmm_mprintf(NULL, 0, "state %d tiedTo (%d) is invalid", state, tied);
        GHMM_LOG(LERROR, estr);
        m_free(estr);
        goto STOP;
      }
      m_free(s);
    }

    /* -------- position for graphical editing ---------------------------- */
    if ((!xmlStrcmp(elem->name, BAD_CAST "position"))) {
      curX = getIntAttribute(elem, "x", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read x position");
      curY = getIntAttribute(elem, "y", &error);
      if (error)
        GHMM_LOG(LWARN, "failed to read y position");

      switch (f->modelType & PTR_TYPE_MASK) {
      case GHMM_kDiscreteHMM:
        f->model.d[modelNo]->s[state].xPosition = curX;
        f->model.d[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kTransitionClasses:
        f->model.ds[modelNo]->s[state].xPosition = curX;
        f->model.ds[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kDiscreteHMM+GHMM_kPairHMM:
      case GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses:
        f->model.dp[modelNo]->s[state].xPosition = curX;
        f->model.dp[modelNo]->s[state].yPosition = curY;
        break;
      case GHMM_kContinuousHMM:
      case GHMM_kContinuousHMM+GHMM_kTransitionClasses:
      case (GHMM_kContinuousHMM+GHMM_kMultivariate):
      case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
        f->model.c[modelNo]->s[state].xPosition = curX;
        f->model.c[modelNo]->s[state].yPosition = curY;
        break;
      default:
        GHMM_LOG(LERROR, "invalid modelType");
        goto STOP;
      }
    }

    elem = elem->next;
  }

  return 0;
STOP:
  m_free(s);
  m_free(desc);
  m_free(emissions)
  return -1;
#undef CUR_PROC
}
Exemplo n.º 26
0
/* ========================================================================= */
static int writeState(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo, int sNo) {
#define CUR_PROC "writeState"

  int rc;
  double w_pi;
  char *w_desc=NULL;

  /* start state */
  if (0 > xmlTextWriterStartElement(writer, BAD_CAST "state")) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (state)");
    goto STOP;
  }

  /* write id attribute */
  if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", sNo))
    GHMM_LOG(LERROR, "failed to write statte id attribute");

  /* read state attribute from different model types */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    w_pi = f->model.d[moNo]->s[sNo].pi;
    w_desc = f->model.d[moNo]->s[sNo].desc;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    w_pi = f->model.ds[moNo]->s[sNo].pi;
    w_desc = f->model.ds[moNo]->s[sNo].desc;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    w_pi = f->model.d[moNo]->s[sNo].pi;
    w_desc = f->model.d[moNo]->s[sNo];
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    w_pi = f->model.c[moNo]->s[sNo].pi;
    w_desc = f->model.c[moNo]->s[sNo].desc;
    break;
  default:
    GHMM_LOG(LCRITIC, "invalid modelType");}

  /* write initial probability as attribute */
  WRITE_DOUBLE_ATTRIBUTE(writer, "initial", w_pi);

  /* write state description */
  if (w_desc) {
    if (xmlTextWriterWriteAttribute(writer, BAD_CAST "desc", BAD_CAST replaceXMLEntity(w_desc)))
      GHMM_LOG(LERROR, "writing state description failed");
  }

  /* write state contents for different model types */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    rc = writeDiscreteStateContents(writer, f, moNo, sNo);
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    rc = writeDiscreteSwitchingStateContents(writer, f, moNo, sNo);
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    rc = writeDiscretePairStateContents(writer, f, moNo, sNo);
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    rc = writeContinuousStateContents(writer, f, moNo, sNo);
    break;
  default:
    GHMM_LOG(LCRITIC, "invalid modelType");
    goto STOP;
  }

  if (rc) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing state contents failed. model_type = %s",
                      strModeltype(f->modelType & PTR_TYPE_MASK));
    goto STOP;
  }


  /* end state*/
  if (0 > xmlTextWriterEndElement(writer)) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (state)");
    goto STOP;
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Exemplo n.º 27
0
/* ========================================================================= */
static int writeTransition(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo,
                           int sNo) {
#define CUR_PROC "writeTransition"


  int cos, i, j;
  int out_states, * out_id;
  double * * out_a;
  double * w_out_a;
  char * tmp;

  /* write state contents for different model types */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    out_states = f->model.d[moNo]->s[sNo].out_states;
    out_id     = f->model.d[moNo]->s[sNo].out_id;
    out_a      = &(f->model.d[moNo]->s[sNo].out_a);
    cos        = 1;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    out_states = f->model.ds[moNo]->s[sNo].out_states;
    out_id     = f->model.ds[moNo]->s[sNo].out_id;
    out_a      = f->model.ds[moNo]->s[sNo].out_a;
    cos        = f->model.ds[moNo]->cos;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    out_states = f->model.dp[moNo]->s[sNo].out_states;
    out_id     = f->model.dp[moNo]->s[sNo].out_id;
    out_a      = f->model.dp[moNo]->s[sNo].out_a;
    cos        = f->model.dp[moNo]->cos;
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    out_states = f->model.c[moNo]->s[sNo].out_states;
    out_id     = f->model.c[moNo]->s[sNo].out_id;
    out_a      = f->model.c[moNo]->s[sNo].out_a;
    cos        = f->model.c[moNo]->cos;
    break;
  default:
    GHMM_LOG(LCRITIC, "invalid modelType");}

  ARRAY_MALLOC(w_out_a, cos);

  for (i=0; i<out_states; i++) {
    if (0 > xmlTextWriterStartElement(writer, BAD_CAST "transition")) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (transition)");
      goto STOP;
    }

    /* write source id (current state attribute */
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "source", "%d", sNo))
      GHMM_LOG(LERROR, "failed to write transition source attribute");

    /* write target id as attribute */
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "target", "%d", out_id[i]))
      GHMM_LOG(LERROR, "failed to write transition target attribute");

    for (j=0; j<cos; j++)
      w_out_a[j] = out_a[j][i];

    tmp = doubleArrayToCSV(w_out_a, cos);
    if (tmp) {
      if (0 > xmlTextWriterWriteElement(writer, BAD_CAST "probability", BAD_CAST tmp)) {
        GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (transition probabilities)");
        m_free(tmp);
        goto STOP;
      }
      m_free(tmp);
    } else {
      GHMM_LOG(LERROR, "converting transition probabilities array to CSV failed");
      goto STOP;
    }

    /* end transition */
    if (0 > xmlTextWriterEndElement(writer)) {
      GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (transition)");
      goto STOP;
    }
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Exemplo n.º 28
0
/* ========================================================================= */
static int writeHMM(xmlTextWriterPtr writer, ghmm_xmlfile* f, int number) {
#define CUR_PROC "writeHMM"
  int rc=0, i, N;
  int w_cos;
  double w_prior;
  char *w_name;
  char * w_type;

  /* start HMM */
  if (0 > xmlTextWriterStartElement(writer, BAD_CAST "HMM")) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (HMM)");
    goto STOP;;
  }

  /* write HMM attributes applicable */
  switch (f->modelType & PTR_TYPE_MASK) {
  case GHMM_kDiscreteHMM:
    w_name  = f->model.d[number]->name;
    w_type  = strModeltype(f->model.d[number]->model_type);
    w_prior = f->model.d[number]->prior;
    N       = f->model.d[number]->N;
    w_cos   = 1;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    w_name  = f->model.ds[number]->name;
    w_type  = strModeltype(f->model.ds[number]->model_type);
    w_prior = f->model.ds[number]->prior;
    N       = f->model.ds[number]->N;
    w_cos   = 0;
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*
    w_name  = f->model.dp[number]->name;
    w_type  = strModeltype(f->model.dp[number]->model_type);
    w_prior = f->model.dp[number]->prior;
    N       = f->model.dp[number]->N;
    w_cos   = 0;
    */
    break;
  case GHMM_kContinuousHMM:
  case (GHMM_kContinuousHMM+GHMM_kMultivariate):
  case (GHMM_kContinuousHMM+GHMM_kTransitionClasses):
  case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses):
    w_name  = f->model.c[number]->name;
    if (f->model.c[number]->model_type)
      w_type  = strModeltype(f->model.c[number]->model_type);
    else
      w_type  = strModeltype(f->modelType);
    w_prior = f->model.c[number]->prior;
    N       = f->model.c[number]->N;
    w_cos   = f->model.c[number]->cos;
    break;
  default:
    GHMM_LOG(LERROR, "invalid modelType");
    goto STOP;}

  if (w_name) {
    if (xmlTextWriterWriteAttribute(writer, BAD_CAST "name", w_name))
      GHMM_LOG(LERROR, "writing HMM name failed");
  }
  if (xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST w_type))
    GHMM_LOG(LERROR, "writing HMM type failed");

  if (w_prior >= 0.0) {
    WRITE_DOUBLE_ATTRIBUTE(writer, "prior", w_prior);
  }

  if (w_cos > 1)
    if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "transitionClasses",
                                              "%d", w_cos))
      GHMM_LOG(LERROR, "failed to write no of transitionClasses");
   

  /* write alphabet if applicable */
  switch (f->modelType & (GHMM_kDiscreteHMM + GHMM_kTransitionClasses
                          + GHMM_kPairHMM)) {
  case GHMM_kDiscreteHMM:
    rc = writeAlphabet(writer, f->model.d[number]->alphabet, kAlphabet);
    break;
  case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses):
    /*rc = writeAlphabet(writer, f->model.ds[number]->alphabet, kAlphabet);*/
    break;
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM):
  case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses):
    /*rc = writeAlphabet(writer, f->model.dp[number]->alphabets[0], kAlphabet);
    if (rc) {
      GHMM_LOG(LERROR, "writing first alphabet of discrete pair HMM failed");
      goto STOP;
    }
    rc = writeAlphabet(writer, f->model.dp[number]->alphabets[1], kAlphabet);*/
    break;
  }

  if (rc) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing alphabet for HMM %d (type %s) failed",
                      number, strModeltype(f->modelType));
      goto STOP;
  }

  /* write label alphabet if applicable */
  if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
      && f->modelType & GHMM_kLabeledStates) {
    if (writeAlphabet(writer, f->model.d[number]->label_alphabet, kLabelAlphabet))
      GHMM_LOG(LERROR, "writing of label alphabet failed");
  }

  /* write background distributions if applicable */
  if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM
      && f->modelType & GHMM_kBackgroundDistributions) {
    if (writeBackground(writer, f->model.d[number]->bp))
      GHMM_LOG(LERROR, "writing of background distributions failed");
  }

  /* write all states */
  for (i=0; i<N; i++)
    if (writeState(writer, f, number, i)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing of state %d in HMM %d failed", i, number);
      goto STOP;
    }

  /* write all outgoing transitions */
  for (i=0; i<N; i++)
    if (writeTransition(writer, f, number, i)) {
      GHMM_LOG_PRINTF(LERROR, LOC, "writing transitions of state %d in HMM %d failed",
                     i, number);
      goto STOP;
    }

  /*end HMM*/
  if (0 > xmlTextWriterEndElement(writer)) {
    GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (HMM)");
    goto STOP;
  }

  return 0;
STOP:
  return -1;
#undef CUR_PROC
}
Exemplo n.º 29
0
/**
   Trains the ghmm_dmodel with a set of annotated sequences till convergence using
   gradient descent.
   Model must not have silent states. (checked in Python wrapper)
   @return            trained model/NULL pointer success/error
   @param mo:         pointer to a ghmm_dmodel
   @param sq:         struct of annotated sequences
   @param eta:        intial parameter eta (learning rate)
   @param no_steps    number of training steps
 */
ghmm_dmodel* ghmm_dmodel_label_gradient_descent (ghmm_dmodel* mo, ghmm_dseq * sq, double eta, int no_steps)
{
#define CUR_PROC "ghmm_dmodel_label_gradient_descent"

  char * str;
  int runs = 0;
  double cur_perf, last_perf;
  ghmm_dmodel *last;

  last = ghmm_dmodel_copy(mo);
  last_perf = compute_performance (last, sq);

  while (eta > GHMM_EPS_PREC && runs < no_steps) {
    runs++;
    if (-1 == gradient_descent_onestep(mo, sq, eta)) {
      ghmm_dmodel_free(&last);
      return NULL;
    }
    cur_perf = compute_performance(mo, sq);

    if (last_perf < cur_perf) {
      /* if model is degenerated, lower eta and try again */
      if (cur_perf > 0.0) {
        str = ighmm_mprintf(NULL, 0, "current performance = %g", cur_perf);
	GHMM_LOG(LINFO, str);
	m_free(str);
        ghmm_dmodel_free(&mo);
        mo = ghmm_dmodel_copy(last);
        eta *= .5;
      }
      else {
        /* Improvement insignificant, assume convergence */
        if (fabs (last_perf - cur_perf) < cur_perf * (-1e-8)) {
          ghmm_dmodel_free(&last);
          str = ighmm_mprintf(NULL, 0, "convergence after %d steps.", runs);
	  GHMM_LOG(LINFO, str);
	  m_free(str);
          return 0;
        }

        if (runs < 175 || 0 == runs % 50) {
          str = ighmm_mprintf(NULL, 0, "Performance: %g\t improvement: %g\t step %d", cur_perf,
			       cur_perf - last_perf, runs);
	  GHMM_LOG(LINFO, str);
	  m_free(str);
	}

        /* significant improvement, next iteration */
        ghmm_dmodel_free(&last);
        last = ghmm_dmodel_copy(mo);
        last_perf = cur_perf;
        eta *= 1.07;
      }
    }
    /* no improvement */
    else {

      if (runs < 175 || 0 == runs % 50) {
        str = ighmm_mprintf(NULL, 0, "Performance: %g\t !IMPROVEMENT: %g\t step %d", cur_perf,
                cur_perf - last_perf, runs);
	GHMM_LOG(LINFO, str);
	m_free(str);
      }

      /* try another training step */
      runs++;
      eta *= .85;
      if (-1 == gradient_descent_onestep(mo, sq, eta)) {
        ghmm_dmodel_free(&last);
        return NULL;
      }
      cur_perf = compute_performance (mo, sq);
      str = ighmm_mprintf(NULL, 0, "Performance: %g\t ?Improvement: %g\t step %d", cur_perf,
              cur_perf - last_perf, runs);
      GHMM_LOG(LINFO, str);
      m_free(str);

      /* improvement, save and proceed with next iteration */
      if (last_perf < cur_perf && cur_perf < 0.0) {
        ghmm_dmodel_free (&last);
        last = ghmm_dmodel_copy(mo);
        last_perf = cur_perf;
      }
      /* still no improvement, revert to saved model */
      else {
        runs--;
        ghmm_dmodel_free(&mo);
        mo = ghmm_dmodel_copy(last);
        eta *= .9;
      }
    }
  }

  ghmm_dmodel_free(&last);
  return mo;

#undef CUR_PROC
}
Exemplo n.º 30
0
/*============================================================================*/
int ghmm_cmodel_logp_joint(ghmm_cmodel *mo, const double *O, int len,
                            const int *S, int slen, double *log_p)
{
# define CUR_PROC "ghmm_cmodel_logp_joint"
    int prevstate, state, state_pos=0, pos=0, j, osc=0;
    int dim = mo->dim;

    prevstate = state = S[0];
    *log_p = log(mo->s[state].pi);
    if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */ )
    {
        *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
        pos+=dim;
    }
        
    for (state_pos=1; state_pos < slen || pos+dim <= len; state_pos++) {
        state = S[state_pos];
        for (j=0; j < mo->s[state].in_states; ++j) {
            if (prevstate == mo->s[state].in_id[j])
                break;
        }

        if (mo->cos > 1) {
            if (!mo->class_change->get_class) {
                GHMM_LOG(LERROR, "get_class not initialized");
                goto STOP;
            }
            osc = mo->class_change->get_class(mo, O, mo->class_change->k, pos);
            if (osc >= mo->cos) {
                GHMM_LOG_PRINTF(LERROR, LOC, "get_class returned index %d "
                                "but model has only %d classes!", osc, mo->cos);
                goto STOP;
            }
        }

        if (j == mo->s[state].in_states ||
            fabs(mo->s[state].in_a[osc][j]) < GHMM_EPS_PREC) {
            GHMM_LOG_PRINTF(LERROR, LOC, "Sequence can't be built. There is no "
                            "transition from state %d to %d.", prevstate, state);
            goto STOP;
        }

        *log_p += log(mo->s[state].in_a[osc][j]);

        if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */) {
            *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos));
            pos+=dim;
        }
        
        prevstate = state;
    }

    if (pos < len)
        GHMM_LOG_PRINTF(LINFO, LOC, "state sequence too short! processed only %d symbols", pos/dim);
    if (state_pos < slen)
        GHMM_LOG_PRINTF(LINFO, LOC, "sequence too short! visited only %d states", state_pos);

    return 0;
  STOP:
    return -1;
# undef CUR_PROC
}