/*===========================================================================*/ static int parseBackground(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int modelNo) { #define CUR_PROC "parseBackground" int error, order; int bgNr, rev; double *b = NULL; char *s = NULL; assert(f->modelType & GHMM_kDiscreteHMM); bgNr = f->model.d[modelNo]->bp->n++; /* get order */ order = getIntAttribute(cur, "order", &error); if (error) order=0; else if (order && !(f->modelType & GHMM_kHigherOrderEmissions)) { GHMM_LOG(LERROR, "background distribution has order > 0, but model is not higher order"); goto STOP; } f->model.d[modelNo]->bp->order[bgNr] = order; /* get name */ s = (char *)getXMLCharAttribute(cur, "key", &error); f->model.d[modelNo]->bp->name[bgNr] = s; rev = getIntAttribute(cur, "rev", &error); if (error) rev = 0; /* get distribution */ s = (char *)xmlNodeGetContent(cur); ARRAY_MALLOC(b, pow(f->model.d[modelNo]->bp->m, order+1)); if (-1 != parseCSVList(s, pow(f->model.d[modelNo]->bp->m, order+1), b, rev)) f->model.d[modelNo]->bp->b[bgNr] = b; else { GHMM_LOG(LERROR, "Can not parse background CSV list."); goto STOP; } free(s); return 0; STOP: m_free(b); free(s); return -1; #undef CUR_PROC }
/* ========================================================================= */ static int writeAlphabet(xmlTextWriterPtr writer, ghmm_alphabet * alfa, int type) { #define CUR_PROC "writeAlphabet" int i; if (0 > xmlTextWriterStartElement(writer, BAD_CAST (type == kAlphabet ? "alphabet" : "classAlphabet"))) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement"); goto STOP;; } if (type == kAlphabet) if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", alfa->id)) GHMM_LOG_PRINTF(LERROR, LOC, "failed to write id-attribute for alphabet" "with id %d", alfa->id); for (i=0; i<alfa->size; i++) { if (0 > xmlTextWriterStartElement(writer, BAD_CAST "symbol")) { GHMM_LOG_PRINTF(LERROR, LOC, "failed to start symbol-tag no %d", i); goto STOP; } if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "code", "%d", i)) { GHMM_LOG_PRINTF(LERROR, LOC, "failed to write code-attribute for symbol %s" "with code %d", alfa->symbols[i], i); goto STOP; } if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST replaceXMLEntity(alfa->symbols[i]))) { GHMM_LOG_PRINTF(LERROR, LOC, "failed to write symbol %s with code %d", alfa->symbols[i], i); goto STOP; } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG_PRINTF(LERROR, LOC, "failed to end symbol-tag no %d", i); goto STOP; } } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at ending alphabet"); goto STOP; } return 0; STOP: return -1; #undef CUR_PROC }
/*============================================================================*/ double ighmm_rand_get_1overa (double x, double mean, double u) { /* Calulates 1/a(x, mean, u), with a = the integral from x til \infty over the Gauss density function */ # define CUR_PROC "ighmm_rand_get_1overa" double erfc_value; if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) erfc_value = erfc ((x - mean) / sqrt (u * 2)); #else erfc_value = ighmm_erfc ((x - mean) / sqrt (u * 2)); #endif if (erfc_value <= DBL_MIN) { ighmm_mes (MES_WIN, "a ~= 0.0 critical! (mue = %.2f, u =%.2f)\n", mean, u); return (erfc_value); } else return (2.0 / erfc_value); STOP: return (-1.0); # undef CUR_PROC } /* ighmm_rand_get_1overa */
/* cumalative distribution function of a-truncated N(mean, u) */ double ighmm_rand_normal_right_cdf (double x, double mean, double u, double a) { # define CUR_PROC "ighmm_rand_normal_right_cdf" if (x <= a) return (0.0); if (u <= a) { GHMM_LOG(LCONVERTED, "u <= a not allowed\n"); goto STOP; } #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* Function: int erfc (double x, gsl_sf_result * result) These routines compute the complementary error function erfc(x) = 1 - erf(x) = 2/\sqrt(\pi) \int_x^\infty \exp(-t^2). */ return 1.0 + (erf ((x - mean) / sqrt (u * 2)) - 1.0) / erfc ((a - mean) / sqrt (u * 2)); #else return 1.0 + (ighmm_erf ((x - mean) / sqrt (u * 2)) - 1.0) / ighmm_erfc ((a - mean) / sqrt (u * 2)); #endif /* Check for ISO C99 */ STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_cdf */
/*===========================================================================*/ static int parseModelType(const char * data, unsigned int size) { #define CUR_PROC "parseModelType" int i, noValidMo, modelType=0; const char * end = data; char * str; while ((end = strchr(data, ' '))) { modelType += matchModelType(data, end-data); size -= (end-data)+1; data = end+1; } modelType += matchModelType(data, size); noValidMo = sizeof(validModelTypes)/sizeof(validModelTypes[0]); for (i=0; i<noValidMo; i++) { if (modelType == validModelTypes[i]) break; } if (i == noValidMo) { str = ighmm_mprintf(NULL, 0, "%d is no known valid model type", modelType); GHMM_LOG(LERROR, str); m_free(str); return -1; } return modelType; #undef CUR_PROC }
/* covariance matrix is linearized */ double ighmm_rand_binormal_density(const double *x, double *mean, double *cov) { # define CUR_PROC "ighmm_rand_binormal_density" double rho; #ifndef DO_WITH_GSL double numerator,part1,part2,part3; #endif if (cov[0] <= 0.0 || cov[2 + 1] <= 0.0) { GHMM_LOG(LCONVERTED, "variance <= 0.0 not allowed\n"); goto STOP; } rho = cov[1] / ( sqrt (cov[0]) * sqrt (cov[2 + 1]) ); /* The denominator is possibly < EPS??? Check that ? */ #ifdef DO_WITH_GSL /* double gsl_ran_bivariate_gaussian_pdf (double x, double y, double sigma_x, double sigma_y, double rho) */ return gsl_ran_bivariate_gaussian_pdf (x[0], x[1], sqrt (cov[0]), sqrt (cov[2 + 1]), rho); #else part1 = (x[0] - mean[0]) / sqrt (cov[0]); part2 = (x[1] - mean[1]) / sqrt (cov[2 + 1]); part3 = m_sqr (part1) - 2 * part1 * part2 + m_sqr (part2); numerator = exp ( -1 * (part3) / ( 2 * (1 - m_sqr(rho)) ) ); return (numerator / ( 2 * PI * sqrt(1 - m_sqr(rho)) )); #endif STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_binormal_density */
/* ========================================================================= */ static char * doubleArrayToCSV(double * array, int size) { #define CUR_PROC "doubleArrayToCSV" int i, pos=0; char *csv=NULL; int singlelength = (2 + /* comma and space */ 8 + /* 8 signifcant digits */ 1 + /* sign */ 5 + /* 'E' and signed mantissa */ 3); /* safety */ int maxlength = size * singlelength; ARRAY_MALLOC(csv, maxlength); for (i=0; i < size-1 && pos + singlelength < maxlength; i++) { pos += sprintf(csv+pos, "%.8g, ", array[i]); } if (i < size-1 || pos + singlelength > maxlength) { GHMM_LOG(LERROR, "writing CSV failed"); goto STOP; } else { pos += sprintf(csv+pos, "%.8g", array[i]); } /*printf("%d bytes of %d written\n", pos, maxlength);*/ return csv; STOP: free(csv); return NULL; #undef CUR_PROC }
/*============================================================================*/ double ighmm_rand_normal_density_trunc(double x, double mean, double u, double a) { # define CUR_PROC "ighmm_rand_normal_density_trunc" #ifndef DO_WITH_GSL double c; #endif /* DO_WITH_GSL */ if (u <= 0.0) { GHMM_LOG(LERROR, "u <= 0.0 not allowed"); goto STOP; } if (x < a) return 0.0; #ifdef DO_WITH_GSL /* move mean to the right position */ return gsl_ran_gaussian_tail_pdf(x - mean, a - mean, sqrt(u)); #else if ((c = ighmm_rand_get_1overa(a, mean, u)) == -1) { GHMM_LOG_QUEUED(LERROR); goto STOP; }; return c * ighmm_rand_normal_density(x, mean, u); #endif /* DO_WITH_GSL */ STOP: return -1.0; # undef CUR_PROC } /* double ighmm_rand_normal_density_trunc */
/* write mean vector and covariance matrix as elements for multinormals */ static int writeMultiNormal(xmlTextWriterPtr writer, ghmm_c_emission *emission) { #define CUR_PROC "writeMultiNormal" char *tmp=NULL; /* writing mean vector*/ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mean")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mean)"); goto STOP; } tmp = doubleArrayToCSV(emission->mean.vec, emission->dimension); if (tmp) { if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) { GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing mean vector CSV"); goto STOP; } m_free(tmp); tmp = NULL; } else { GHMM_LOG(LERROR, "converting array to CSV failed for mean vector"); goto STOP; } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement mean"); goto STOP; } /* writing covariance matrix*/ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "variance")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (variance)"); goto STOP; } tmp = doubleArrayToCSV(emission->variance.mat, emission->dimension * emission->dimension); if (tmp) { if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) { GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing variance matrix CSV"); goto STOP; } m_free(tmp); tmp = NULL; } else { GHMM_LOG(LERROR, "converting array to CSV failed for covariance matrix"); goto STOP; } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement variance"); goto STOP; } return 0; STOP: free(tmp); return -1; #undef CUR_PROC }
/*===========================================================================*/ static int parseCSVList(const char * data, unsigned int size, double * array, int reverse) { #define CUR_PROC "parseCSVList" int retval=0; int i; char * * next, * estr; double tmp; ARRAY_CALLOC(next, 1); for (i=0; i<size; i++) { array[i] = strtod(data, next); if (data == *next) { estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. entry %d of %d. (%s)", i, size, *next); GHMM_LOG(LERROR, estr); m_free(estr); retval=-1; break; } if (next) data = *next+1; else break; } if (i != size) { retval=-1; estr = ighmm_mprintf(NULL, 0, "error in parsing CSV. sizes do not match (%d != %d)", i, size); GHMM_LOG(LERROR, estr); m_free(estr); } if (reverse) { for (i=0; i<size/2; i++) { tmp = array[i]; array[i] = array[size-i-1]; array[size-i-1] = tmp; } } STOP: m_free(next); return retval; #undef CUR_PROC }
double ighmm_rand_normal_right (double a, double mue, double u, int seed) { # define CUR_PROC "ighmm_rand_normal_right" double x = -1; double sigma; #ifdef DO_WITH_GSL double s; #else double U, Us, Us1, Feps, t, T; #endif if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } sigma = sqrt(u); if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL /* move boundary to lower values in order to achieve maximum at mue gsl_ran_gaussian_tail(generator, lower_boundary, sigma) */ return mue + gsl_ran_gaussian_tail(RNG, a - mue, sqrt (u)); #else /* DO_WITH_GSL */ /* Inverse transformation with restricted sampling by Fishman */ U = GHMM_RNG_UNIFORM(RNG); Feps = ighmm_rand_get_PHI((a-mue) / sigma); Us = Feps + (1-Feps) * U; Us1 = 1-Us; t = m_min (Us, Us1); t = sqrt (-log (t * t)); T = sigma * (t - (C0 + t * (C1 + t * C2)) / (1 + t * (D1 + t * (D2 + t * D3)))); if (Us < Us1) x = mue - T; else x = mue + T; #endif /* DO_WITH_GSL */ STOP: return x; # undef CUR_PROC } /* randvar_normal_pos */
/** Calculates the logarithm of sum(exp(log_a[j,a_pos])+exp(log_gamma[j,g_pos])) which corresponds to the logarithm of the sum of a[j,a_pos]*gamma[j,g_pos] @return ighmm_log_sum for products of a row from gamma and a row from matrix A @param log_a: row of the transition matrix with logarithmic values (1.0 for log(0)) @param s: ghmm_dstate whose gamma-value is calculated @param parent: a pointer to the parent hypothesis */ static double ighmm_log_gamma_sum (double *log_a, ghmm_dstate * s, hypoList * parent) { #define CUR_PROC "ighmm_log_gamma_sum" double result; int j, j_id, k; double max = 1.0; int argmax = 0; double *logP; /* shortcut for the trivial case */ if (parent->gamma_states == 1) for (j = 0; j < s->in_states; j++) if (parent->gamma_id[0] == s->in_id[j]) return parent->gamma_a[0] + log_a[j]; ARRAY_MALLOC (logP, s->in_states); /* calculate logs of a[k,l]*gamma[k,hi] as sums of logs and find maximum: */ for (j = 0; j < s->in_states; j++) { j_id = s->in_id[j]; /* search for state j_id in the gamma list */ for (k = 0; k < parent->gamma_states; k++) if (parent->gamma_id[k] == j_id) break; if (k == parent->gamma_states) logP[j] = 1.0; else { logP[j] = log_a[j] + parent->gamma_a[k]; if (max == 1.0 || (logP[j] > max && logP[j] != 1.0)) { max = logP[j]; argmax = j; } } } /* calculate max+log(1+sum[j!=argmax; exp(logP[j]-max)]) */ result = 1.0; for (j = 0; j < s->in_states; j++) if (j != argmax && logP[j] != 1.0) result += exp (logP[j] - max); result = log (result); result += max; free (logP); return result; STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ighmm_log_gamma_sum failed\n"); exit (1); #undef CUR_PROC }
/*===========================================================================*/ static ghmm_alphabet * parseAlphabet(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f) { #define CUR_PROC "parseAlphabet" char * str; int M, code, error; xmlNodePtr symbol; ghmm_alphabet * alfa; ARRAY_CALLOC(alfa, 1); symbol = cur->children; M=0; while (symbol!=NULL) { if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) { code = getIntAttribute(symbol, "code", &error); if (error || code!=M) { str = ighmm_mprintf(NULL, 0, "non consecutive code %d == %d", code, M); GHMM_LOG(LERROR, str); m_free(str); goto STOP; } else M++; } symbol=symbol->next; } alfa->size = M; /*printf("Parsing alphabet with %d symbols\n", alfa->size);*/ ARRAY_MALLOC(alfa->symbols, M); symbol = cur->children; M=0; while (symbol!=NULL) { if ((!xmlStrcmp(symbol->name, BAD_CAST "symbol"))) { alfa->symbols[M++] = (char *)xmlNodeGetContent(symbol); /*printf("%d. symbol: %s\n", M, alfa->symbols[M-1]);*/ } symbol=symbol->next; } return alfa; STOP: m_free(alfa->symbols); m_free(alfa) return NULL; #undef CUR_PROC }
/* ========================================================================= */ static char * strModeltype(int modelType) { #define CUR_PROC "strModeltype" int end; char * mt; ARRAY_CALLOC(mt, 200); if (modelType > 0) { if (modelType & GHMM_kLeftRight) strcat(mt, "left-right "); if (modelType & GHMM_kSilentStates) strcat(mt, "silent "); if (modelType & GHMM_kTiedEmissions) strcat(mt, "tied "); if (modelType & GHMM_kHigherOrderEmissions) strcat(mt, "higher-order "); if (modelType & GHMM_kBackgroundDistributions) strcat(mt, "background "); if (modelType & GHMM_kLabeledStates) strcat(mt, "labeled "); if (modelType & GHMM_kTransitionClasses) strcat(mt, "transition-classes "); if (modelType & GHMM_kDiscreteHMM) strcat(mt, "discrete "); if (modelType & GHMM_kContinuousHMM) strcat(mt, "continuous "); if (modelType & GHMM_kPairHMM) strcat(mt, "pair "); if (modelType & GHMM_kMultivariate) strcat(mt, "multivariate "); } else { GHMM_LOG(LERROR, "can't write models with unspecified modeltype"); goto STOP; } /* overwrite the last space */ end = strlen(mt); mt[end-1] = '\0'; return mt; STOP: m_free(mt); return NULL; #undef CUR_PROC }
/* cumalative distribution function of N(mean, u) */ double ighmm_rand_normal_cdf (double x, double mean, double u) { # define CUR_PROC "ighmm_rand_normal_cdf" if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* PHI(x)=erf(x/sqrt(2))/2+0.5 */ return (erf ((x - mean) / sqrt (u * 2.0)) + 1.0) / 2.0; #else return (ighmm_erf ((x - mean) / sqrt (u * 2.0)) + 1.0) / 2.0; #endif /* Check for ISO C99 */ STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_cdf */
/* cumalative distribution function of a uniform distribution in the range [min,max] */ double ighmm_rand_uniform_cdf (double x, double max, double min) { # define CUR_PROC "ighmm_rand_uniform_cdf" if (max <= min) { GHMM_LOG(LCONVERTED, "max <= min not allowed\n"); goto STOP; } if (x < min) { return 0.0; } if (x >= max) { return 1.0; } return (x-min)/(max-min); STOP: return (-1.0); # undef CUR_PROC } /* ighmm_rand_uniform_cdf */
/*============================================================================*/ double ighmm_rand_uniform_density (double x, double max, double min) { # define CUR_PROC "ighmm_rand_uniform_density" double prob; if (max <= min) { GHMM_LOG(LCONVERTED, "max <= min not allowed \n"); goto STOP; } prob = 1.0/(max-min); if ( (x <= max) && (x>=min) ){ return prob; }else{ return 0.0; } STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_uniform_density */
/* ========================================================================= */ static int writeBackground(xmlTextWriterPtr writer, ghmm_dbackground* bg) { #define CUR_PROC "writeBackground" int i; char * tmp=NULL; for (i=0; i<bg->n; i++) { if (0 > xmlTextWriterStartElement(writer, BAD_CAST "background")) { GHMM_LOG_PRINTF(LERROR, LOC, "Error at starting backgroung %d", i); return -1; } if (!(bg->name)) { if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "key", "bg_%d", i)) GHMM_LOG(LERROR, "Error at writing background key"); } else { if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "key", BAD_CAST (bg->name[i]))) GHMM_LOG(LERROR, "Error at writing background key"); } if (0 < bg->order[i]) if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "order", "%d", bg->order[i])) GHMM_LOG(LERROR, "can't write background order attribute"); tmp = doubleArrayToCSV(bg->b[i], pow(bg->m, bg->order[i]+1)); if (tmp) { if (0 > xmlTextWriterWriteRaw(writer, BAD_CAST tmp)) { GHMM_LOG(LERROR, "Error at xmlTextWriterWriteRaw while writing" "background distribution CSV"); m_free(tmp); return -1; } m_free(tmp); } else { GHMM_LOG(LERROR, "converting array to CSV failed for background distribution"); return -1; } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement while ending" "background distribution"); return -1; } } return 0; #undef CUR_PROC }
/* inserts new hypothesis into list at position indicated by pointer plist */ static void ighmm_hlist_insert (hypoList ** plist, int newhyp, hypoList * parlist) { #define CUR_PROC "ighmm_hlist_insert" hypoList *newlist; ARRAY_CALLOC (newlist, 1); newlist->hyp_c = newhyp; if (parlist) parlist->refcount += 1; newlist->parent = parlist; newlist->next = *plist; *plist = newlist; return; STOP: /* Label STOP from ARRAY_[CM]ALLOC */ GHMM_LOG(LCONVERTED, "ighmm_hlist_insert failed\n"); exit (1); #undef CUR_PROC }
/*===========================================================================*/ double ighmm_rand_uniform_cont (int seed, double max, double min) { # define CUR_PROC "ighmm_rand_uniform_cont" if (max <= min) { GHMM_LOG(LCONVERTED, "max <= min not allowed\n"); goto STOP; } if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL return (double)(((double)gsl_rng_uniform (RNG)*(max-min)) + min); #else return (double)((GHMM_RNG_UNIFORM (RNG))*(max-min) + min ); #endif STOP: return (-1.0); # undef CUR_PROC } /* ighmm_rand_uniform_cont */
/* PROBLEM: Entries can get very small and be rounded to 0 */ int ighmm_cvector_normalize (double *v, int len) { #define CUR_PROC "ighmm_cvector_normalize" int i; double sum = 0.0; char * estr; for (i = 0; i < len; i++) sum += v[i]; if (i>0 && sum<GHMM_EPS_PREC) { estr = ighmm_mprintf(NULL, 0, "Can't normalize vector. Sum smaller than %g\n" , GHMM_EPS_PREC); GHMM_LOG(LWARN, estr); m_free(estr); return (-1); } for (i = 0; i < len; i++) v[i] /= sum; return 0; #undef CUR_PROC } /* ighmm_cvector_normalize */
double ighmm_rand_normal_density_approx (double x, double mean, double u) { # define CUR_PROC "ighmm_rand_normal_density_approx" #ifdef HAVE_LIBPTHREAD static pthread_mutex_t lock; #endif /* HAVE_LIBPTHREAD */ int i; double y, z, pdf_x; if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } if (!pdf_stdnormal_exists) { #ifdef HAVE_LIBPTHREAD pthread_mutex_lock (&lock); /* Put on a lock, because the clustering is parallel */ #endif /* HAVE_LIBPTHREAD */ randvar_init_pdf_stdnormal (); #ifdef HAVE_LIBPTHREAD pthread_mutex_unlock (&lock); /* Take the lock off */ #endif /* HAVE_LIBPTHREAD */ } y = 1 / sqrt (u); z = fabs ((x - mean) * y); i = (int) (z * X_FAKT_PDF); /* linear interpolation: */ if (i >= PDFLEN - 1) { i = PDFLEN - 1; pdf_x = y * pdf_stdnormal[i]; } else pdf_x = y * (pdf_stdnormal[i] + (z - i * X_STEP_PDF) * (pdf_stdnormal[i + 1] - pdf_stdnormal[i]) / X_STEP_PDF); return (pdf_x); STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_density_approx */
/*============================================================================*/ double ighmm_rand_normal_density (double x, double mean, double u) { # define CUR_PROC "ighmm_rand_normal_density" #ifndef DO_WITH_GSL double expo; #endif if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } /* The denominator is possibly < EPS??? Check that ? */ #ifdef DO_WITH_GSL /* double gsl_ran_gaussian_pdf (double x, double sigma) */ return gsl_ran_gaussian_pdf (x - mean, sqrt (u)); #else expo = exp (-1 * m_sqr (mean - x) / (2 * u)); return (1 / (sqrt (2 * PI * u)) * expo); #endif STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_density */
/* ========================================================================= */ static int writeContinuousStateContents(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo, int sNo) { #define CUR_PROC "writeContinuousStateContents" int i; ghmm_cstate *state = f->model.c[moNo]->s + sNo; int allFixed = state->fix; ghmm_c_emission *emission; /* writing continuous distribution */ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "mixture")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (mixture)"); goto STOP; } if (f->model.c[moNo]->s[sNo].fix) allFixed = 1; for(i=0; i < f->model.c[moNo]->s[sNo].M; i++){ emission = f->model.c[moNo]->s[sNo].e+i; switch (emission->type) { case normal: if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normal")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normal)"); goto STOP; } WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val); WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val); break; case multinormal: if (0 > xmlTextWriterStartElement(writer, BAD_CAST "multinormal")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (multinormal)"); goto STOP; } if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "dimension", "%d", emission->dimension)) { GHMM_LOG(LERROR, "failed to write dimension attribute"); goto STOP; } break; case normal_left: if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalLeftTail")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalLeftTail)"); goto STOP; } WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val); WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val); WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->min); break; case normal_right: if (0 > xmlTextWriterStartElement(writer, BAD_CAST "normalRightTail")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (normalRightTail)"); goto STOP; } WRITE_DOUBLE_ATTRIBUTE(writer, "mean", emission->mean.val); WRITE_DOUBLE_ATTRIBUTE(writer, "variance", emission->variance.val); WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->max); break; case uniform: if (0 > xmlTextWriterStartElement(writer, BAD_CAST "uniform")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (uniform)"); goto STOP; } WRITE_DOUBLE_ATTRIBUTE(writer, "min", emission->min); WRITE_DOUBLE_ATTRIBUTE(writer, "max", emission->max); break; default: GHMM_LOG_PRINTF(LERROR, LOC, "invalid density %d at position %d", emission->type, i); goto STOP; } /*optional values */ if (allFixed || emission->fixed) { if (0 > xmlTextWriterWriteAttribute(writer, BAD_CAST "fixed", BAD_CAST "1")) { GHMM_LOG(LERROR, "failed to set fixed attribute"); goto STOP; } } if (state->M > 1) { WRITE_DOUBLE_ATTRIBUTE(writer, "prior", state->c[i]); } /* write mean vector and covariance matrix as childs for multinormal */ if (emission->type == multinormal) { if (0 > writeMultiNormal(writer, emission)) { GHMM_LOG(LERROR, "failed to write mean and covariance childs"); goto STOP; } } if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (all densities)"); goto STOP; } } /* end mixture tag */ if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (mixture)"); goto STOP; } /* writing positions */ if ((state->xPosition > 0) && (state->yPosition > 0)) { if (xmlTextWriterStartElement(writer, BAD_CAST "position") < 0) { GHMM_LOG(LERROR, "failed to start position element (position)"); goto STOP; } if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "x", "%d", state->xPosition)) { GHMM_LOG(LERROR, "failed to write x position"); goto STOP; } if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "y", "%d", state->yPosition)) { GHMM_LOG(LERROR, "failed to write y position"); goto STOP; } if (xmlTextWriterEndElement(writer) < 0) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (position)"); goto STOP; } } return 0; STOP: return -1; #undef CUR_PROC }
/*===========================================================================*/ static int parseState(xmlDocPtr doc, xmlNodePtr cur, ghmm_xmlfile* f, int * inDegree, int * outDegree, int modelNo) { #define CUR_PROC "parseState" int i, error, order=0, state=-1442, fixed=-985, tied=-9354, M, aprox, label; int curX=0, curY=0; double pi, prior; double *emissions = NULL; char *desc = NULL; char *s = NULL, *estr; int rev, stateFixed=1; ghmm_cstate *newcstate; ghmm_c_emission *emission; xmlNodePtr elem, child, multichild; state = getIntAttribute(cur, "id", &error); pi = getDoubleAttribute(cur, "initial", &error); if (error) { estr = ighmm_mprintf(NULL, 0, "can't read required intial probability for" "state %d", state); GHMM_LOG(LERROR, estr); goto STOP; } else desc = xmlGetProp(cur, BAD_CAST "desc"); elem = cur->children; while (elem!=NULL) { /* ======== silent state ============================================== */ if ((!xmlStrcmp(elem->name, BAD_CAST "silent"))) { switch (f->modelType & PTR_TYPE_MASK) { case (GHMM_kDiscreteHMM): f->model.d[modelNo]->silent[state] = 1; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): f->model.ds[modelNo]->silent[state] = 1; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): f->model.dp[modelNo]->silent[state] = 1; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } } /* ======== discrete state (possible higher order) ==================== */ if ((!xmlStrcmp(elem->name, BAD_CAST "discrete"))) { assert((f->modelType & GHMM_kDiscreteHMM) && ((f->modelType & GHMM_kPairHMM) == 0)); /* fixed is a propety of the distribution and optional */ fixed = getIntAttribute(elem, "fixed", &error); if (error) fixed = 0; /* order is optional for discrete */ if (f->modelType & GHMM_kHigherOrderEmissions) { order = getIntAttribute(elem, "order", &error); if (error) order = 0; } rev = getIntAttribute(cur, "rev", &error); if (error) rev = 0; /* parsing emission probabilities */ s = (char *)xmlNodeGetContent(elem); switch (f->modelType & PTR_TYPE_MASK) { case (GHMM_kDiscreteHMM): f->model.d[modelNo]->s[state].desc = desc; f->model.d[modelNo]->s[state].pi = pi; f->model.d[modelNo]->s[state].fix = fixed; if (f->modelType & GHMM_kHigherOrderEmissions) { f->model.d[modelNo]->order[state] = order; if (f->model.d[modelNo]->maxorder < order) { f->model.d[modelNo]->maxorder = order; estr = ighmm_mprintf(NULL, 0, "Updated maxorder to %d\n", f->model.d[modelNo]->maxorder); GHMM_LOG(LDEBUG, estr); m_free(estr); } } ARRAY_MALLOC(emissions, pow(f->model.d[modelNo]->M, order+1)); parseCSVList(s, pow(f->model.d[modelNo]->M, order+1), emissions, rev); free(f->model.d[modelNo]->s[state].b); f->model.d[modelNo]->s[state].b = emissions; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): f->model.ds[modelNo]->s[state].desc = desc; f->model.ds[modelNo]->s[state].pi = pi; f->model.ds[modelNo]->s[state].fix = fixed; if (f->modelType & GHMM_kHigherOrderEmissions) f->model.ds[modelNo]->order[state] = order; ARRAY_MALLOC(emissions, pow(f->model.ds[modelNo]->M, order+1)); parseCSVList(s, pow(f->model.ds[modelNo]->M, order+1), emissions, rev); f->model.ds[modelNo]->s[state].b = emissions; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } m_free(s); } /* ======== continuous state ========================================== */ if ((!xmlStrcmp(elem->name, BAD_CAST "mixture"))) { assert(f->modelType & GHMM_kContinuousHMM); M = 0; child = elem->children; while (child != NULL) { if ((!xmlStrcmp(child->name, BAD_CAST "normal")) || (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) || (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) || (!xmlStrcmp(child->name, BAD_CAST "multinormal")) || (!xmlStrcmp(child->name, BAD_CAST "uniform"))){ M ++; } child = child->next; } ghmm_cstate_alloc(f->model.c[modelNo]->s + state, M, inDegree[state], outDegree[state], f->model.c[modelNo]->cos); newcstate = f->model.c[modelNo]->s + state; newcstate->desc = desc; newcstate->M = M; newcstate->pi = pi; if( f->model.c[modelNo]->M < M) f->model.c[modelNo]->M = M; child = elem->children; i = 0; while (child != NULL) { emission = newcstate->e+i; /* common attributes */ if ((!xmlStrcmp(child->name, BAD_CAST "normal")) || (!xmlStrcmp(child->name, BAD_CAST "normalLeftTail")) || (!xmlStrcmp(child->name, BAD_CAST "normalRightTail")) || (!xmlStrcmp(child->name, BAD_CAST "multinormal")) || (!xmlStrcmp(child->name, BAD_CAST "uniform"))){ fixed = getIntAttribute(child, "fixed", &error); if (error) fixed = 0; stateFixed = fixed && stateFixed; /* allocate emission */ emission->fixed = fixed; prior = getDoubleAttribute(child, "prior", &error); if (error) prior = 1.0; newcstate->c[i] = prior; } /* child is not a density, continue with the next child */ else { child = child->next; continue; } /* density type dependent attributes */ if ((!xmlStrcmp(child->name, BAD_CAST "normal"))) { emission->mean.val = getDoubleAttribute(child, "mean", &error); emission->variance.val = getDoubleAttribute(child, "variance", &error); /* should the normal distribution be approximated? */ aprox = getIntAttribute(child, "approx", &error); if (error) aprox = 0; emission->type = aprox ? normal_approx : normal; emission->dimension = 1; if (f->model.c[modelNo]->dim > 1) { GHMM_LOG(LERROR, "All emissions must have same dimension."); goto STOP; } } if ((!xmlStrcmp(child->name, BAD_CAST "normalLeftTail"))) { emission->mean.val = getDoubleAttribute(child, "mean", &error); emission->variance.val = getDoubleAttribute(child, "variance", &error); emission->min = getDoubleAttribute(child, "max", &error); emission->type = normal_left; emission->dimension = 1; if (f->model.c[modelNo]->dim > 1) { GHMM_LOG(LERROR, "All emissions must have same dimension."); goto STOP; } } if ((!xmlStrcmp(child->name, BAD_CAST "normalRightTail"))) { emission->mean.val = getDoubleAttribute(child, "mean", &error); emission->variance.val = getDoubleAttribute(child, "variance", &error); emission->max = getDoubleAttribute(child, "min", &error); emission->type = normal_right; emission->dimension = 1; if (f->model.c[modelNo]->dim > 1) { GHMM_LOG(LERROR, "All emissions must have same dimension."); goto STOP; } } if ((!xmlStrcmp(child->name, BAD_CAST "uniform"))) { emission->max = getDoubleAttribute(child, "max", &error); emission->min = getDoubleAttribute(child, "min", &error); emission->type = uniform; emission->dimension = 1; if (f->model.c[modelNo]->dim > 1) { GHMM_LOG(LERROR, "All emissions must have same dimension."); goto STOP; } } if ((!xmlStrcmp(child->name, BAD_CAST "multinormal"))) { emission->type = multinormal; emission->dimension = getIntAttribute(child, "dimension", &error); /* check that all emissions in all states have same dimension or set when first emission is read*/ if (f->model.c[modelNo]->dim <= 1) f->model.c[modelNo]->dim = emission->dimension; else if (f->model.c[modelNo]->dim != emission->dimension) { GHMM_LOG(LERROR, "All emissions must have same dimension."); goto STOP; } if (0 != ghmm_c_emission_alloc(emission, emission->dimension)) { GHMM_LOG(LERROR, "Can not allocate multinormal emission."); goto STOP; } multichild = child->children; while (multichild != NULL) { if ((!xmlStrcmp(multichild->name, BAD_CAST "mean"))) { s = (char *)xmlNodeGetContent(multichild); if (-1 == parseCSVList(s, emission->dimension, emission->mean.vec, 0)) { GHMM_LOG(LERROR, "Can not parse mean CSV list."); goto STOP; } } if ((!xmlStrcmp(multichild->name, BAD_CAST "variance"))) { s = (char *)xmlNodeGetContent(multichild); if (-1 == parseCSVList(s, emission->dimension * emission->dimension, emission->variance.mat, 0)) { GHMM_LOG(LERROR, "Can not parse variance CSV list."); goto STOP; } if (0 != ighmm_invert_det(emission->sigmainv, &emission->det, emission->dimension, emission->variance.mat)) { GHMM_LOG(LERROR, "Can not calculate inverse of covariance matrix."); goto STOP; } if (0 != ighmm_cholesky_decomposition(emission->sigmacd, emission->dimension, emission->variance.mat)) { GHMM_LOG(LERROR, "Can not calculate cholesky decomposition of covariance matrix."); goto STOP; } } multichild = multichild->next; } } i++; child = child->next; } newcstate->fix = stateFixed; } /* ======== pair hmm state ============================================ */ if ((!xmlStrcmp(elem->name, BAD_CAST "pair"))) { } /* -------- background name ------------------------------------------ */ if ((!xmlStrcmp(elem->name, BAD_CAST "backgroundKey"))) { assert(f->modelType & GHMM_kBackgroundDistributions); s = (char *)xmlNodeGetContent(elem); for (i=0; i<f->model.d[modelNo]->bp->n; i++) { if (0 == strcmp(s, f->model.d[modelNo]->bp->name[i])) { if (order != f->model.d[modelNo]->bp->order[i]) { estr = ighmm_mprintf(NULL, 0, "order of background %s and state %d" " does not match", f->model.d[modelNo]->bp->name[i], state); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } else { f->model.d[modelNo]->background_id[state] = i; break; } } } if (i == f->model.d[modelNo]->bp->n) { estr = ighmm_mprintf(NULL, 0, "can't find background with name %s in" " state %d", s, state); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } m_free(s); } /* -------- tied to --------------------------------------------------- */ if ((!xmlStrcmp(elem->name, BAD_CAST "class"))) { assert(f->modelType & GHMM_kLabeledStates); s = (char *)xmlNodeGetContent(elem); label = atoi(s); m_free(s); if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM) { if (f->model.d[modelNo]->label_alphabet->size > label) f->model.d[modelNo]->label[state] = label; else GHMM_LOG(LWARN, "Invalid label"); } } /* -------- tied to --------------------------------------------------- */ if ((!xmlStrcmp(elem->name, BAD_CAST "tiedTo"))) { assert(f->modelType & GHMM_kTiedEmissions); s = (char *)xmlNodeGetContent(elem); tied = atoi(s); if (state>=tied) { f->model.d[modelNo]->tied_to[state] = tied; if (f->model.d[modelNo]->tied_to[tied] != tied) { estr = ighmm_mprintf(NULL, 0, "state %d not tied to tie group leader", state); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } } else { estr = ighmm_mprintf(NULL, 0, "state %d tiedTo (%d) is invalid", state, tied); GHMM_LOG(LERROR, estr); m_free(estr); goto STOP; } m_free(s); } /* -------- position for graphical editing ---------------------------- */ if ((!xmlStrcmp(elem->name, BAD_CAST "position"))) { curX = getIntAttribute(elem, "x", &error); if (error) GHMM_LOG(LWARN, "failed to read x position"); curY = getIntAttribute(elem, "y", &error); if (error) GHMM_LOG(LWARN, "failed to read y position"); switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: f->model.d[modelNo]->s[state].xPosition = curX; f->model.d[modelNo]->s[state].yPosition = curY; break; case GHMM_kDiscreteHMM+GHMM_kTransitionClasses: f->model.ds[modelNo]->s[state].xPosition = curX; f->model.ds[modelNo]->s[state].yPosition = curY; break; case GHMM_kDiscreteHMM+GHMM_kPairHMM: case GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses: f->model.dp[modelNo]->s[state].xPosition = curX; f->model.dp[modelNo]->s[state].yPosition = curY; break; case GHMM_kContinuousHMM: case GHMM_kContinuousHMM+GHMM_kTransitionClasses: case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): f->model.c[modelNo]->s[state].xPosition = curX; f->model.c[modelNo]->s[state].yPosition = curY; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP; } } elem = elem->next; } return 0; STOP: m_free(s); m_free(desc); m_free(emissions) return -1; #undef CUR_PROC }
/* ========================================================================= */ static int writeState(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo, int sNo) { #define CUR_PROC "writeState" int rc; double w_pi; char *w_desc=NULL; /* start state */ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "state")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (state)"); goto STOP; } /* write id attribute */ if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "id", "%d", sNo)) GHMM_LOG(LERROR, "failed to write statte id attribute"); /* read state attribute from different model types */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: w_pi = f->model.d[moNo]->s[sNo].pi; w_desc = f->model.d[moNo]->s[sNo].desc; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): w_pi = f->model.ds[moNo]->s[sNo].pi; w_desc = f->model.ds[moNo]->s[sNo].desc; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /* w_pi = f->model.d[moNo]->s[sNo].pi; w_desc = f->model.d[moNo]->s[sNo]; */ break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): w_pi = f->model.c[moNo]->s[sNo].pi; w_desc = f->model.c[moNo]->s[sNo].desc; break; default: GHMM_LOG(LCRITIC, "invalid modelType");} /* write initial probability as attribute */ WRITE_DOUBLE_ATTRIBUTE(writer, "initial", w_pi); /* write state description */ if (w_desc) { if (xmlTextWriterWriteAttribute(writer, BAD_CAST "desc", BAD_CAST replaceXMLEntity(w_desc))) GHMM_LOG(LERROR, "writing state description failed"); } /* write state contents for different model types */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: rc = writeDiscreteStateContents(writer, f, moNo, sNo); break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): rc = writeDiscreteSwitchingStateContents(writer, f, moNo, sNo); break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /* rc = writeDiscretePairStateContents(writer, f, moNo, sNo); */ break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): rc = writeContinuousStateContents(writer, f, moNo, sNo); break; default: GHMM_LOG(LCRITIC, "invalid modelType"); goto STOP; } if (rc) { GHMM_LOG_PRINTF(LERROR, LOC, "writing state contents failed. model_type = %s", strModeltype(f->modelType & PTR_TYPE_MASK)); goto STOP; } /* end state*/ if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (state)"); goto STOP; } return 0; STOP: return -1; #undef CUR_PROC }
/* ========================================================================= */ static int writeTransition(xmlTextWriterPtr writer, ghmm_xmlfile* f, int moNo, int sNo) { #define CUR_PROC "writeTransition" int cos, i, j; int out_states, * out_id; double * * out_a; double * w_out_a; char * tmp; /* write state contents for different model types */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: out_states = f->model.d[moNo]->s[sNo].out_states; out_id = f->model.d[moNo]->s[sNo].out_id; out_a = &(f->model.d[moNo]->s[sNo].out_a); cos = 1; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): out_states = f->model.ds[moNo]->s[sNo].out_states; out_id = f->model.ds[moNo]->s[sNo].out_id; out_a = f->model.ds[moNo]->s[sNo].out_a; cos = f->model.ds[moNo]->cos; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /* out_states = f->model.dp[moNo]->s[sNo].out_states; out_id = f->model.dp[moNo]->s[sNo].out_id; out_a = f->model.dp[moNo]->s[sNo].out_a; cos = f->model.dp[moNo]->cos; */ break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): out_states = f->model.c[moNo]->s[sNo].out_states; out_id = f->model.c[moNo]->s[sNo].out_id; out_a = f->model.c[moNo]->s[sNo].out_a; cos = f->model.c[moNo]->cos; break; default: GHMM_LOG(LCRITIC, "invalid modelType");} ARRAY_MALLOC(w_out_a, cos); for (i=0; i<out_states; i++) { if (0 > xmlTextWriterStartElement(writer, BAD_CAST "transition")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (transition)"); goto STOP; } /* write source id (current state attribute */ if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "source", "%d", sNo)) GHMM_LOG(LERROR, "failed to write transition source attribute"); /* write target id as attribute */ if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "target", "%d", out_id[i])) GHMM_LOG(LERROR, "failed to write transition target attribute"); for (j=0; j<cos; j++) w_out_a[j] = out_a[j][i]; tmp = doubleArrayToCSV(w_out_a, cos); if (tmp) { if (0 > xmlTextWriterWriteElement(writer, BAD_CAST "probability", BAD_CAST tmp)) { GHMM_LOG(LERROR, "Error at xmlTextWriterWriteElement (transition probabilities)"); m_free(tmp); goto STOP; } m_free(tmp); } else { GHMM_LOG(LERROR, "converting transition probabilities array to CSV failed"); goto STOP; } /* end transition */ if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (transition)"); goto STOP; } } return 0; STOP: return -1; #undef CUR_PROC }
/* ========================================================================= */ static int writeHMM(xmlTextWriterPtr writer, ghmm_xmlfile* f, int number) { #define CUR_PROC "writeHMM" int rc=0, i, N; int w_cos; double w_prior; char *w_name; char * w_type; /* start HMM */ if (0 > xmlTextWriterStartElement(writer, BAD_CAST "HMM")) { GHMM_LOG(LERROR, "Error at xmlTextWriterStartElement (HMM)"); goto STOP;; } /* write HMM attributes applicable */ switch (f->modelType & PTR_TYPE_MASK) { case GHMM_kDiscreteHMM: w_name = f->model.d[number]->name; w_type = strModeltype(f->model.d[number]->model_type); w_prior = f->model.d[number]->prior; N = f->model.d[number]->N; w_cos = 1; break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): w_name = f->model.ds[number]->name; w_type = strModeltype(f->model.ds[number]->model_type); w_prior = f->model.ds[number]->prior; N = f->model.ds[number]->N; w_cos = 0; break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /* w_name = f->model.dp[number]->name; w_type = strModeltype(f->model.dp[number]->model_type); w_prior = f->model.dp[number]->prior; N = f->model.dp[number]->N; w_cos = 0; */ break; case GHMM_kContinuousHMM: case (GHMM_kContinuousHMM+GHMM_kMultivariate): case (GHMM_kContinuousHMM+GHMM_kTransitionClasses): case (GHMM_kContinuousHMM+GHMM_kMultivariate+GHMM_kTransitionClasses): w_name = f->model.c[number]->name; if (f->model.c[number]->model_type) w_type = strModeltype(f->model.c[number]->model_type); else w_type = strModeltype(f->modelType); w_prior = f->model.c[number]->prior; N = f->model.c[number]->N; w_cos = f->model.c[number]->cos; break; default: GHMM_LOG(LERROR, "invalid modelType"); goto STOP;} if (w_name) { if (xmlTextWriterWriteAttribute(writer, BAD_CAST "name", w_name)) GHMM_LOG(LERROR, "writing HMM name failed"); } if (xmlTextWriterWriteAttribute(writer, BAD_CAST "type", BAD_CAST w_type)) GHMM_LOG(LERROR, "writing HMM type failed"); if (w_prior >= 0.0) { WRITE_DOUBLE_ATTRIBUTE(writer, "prior", w_prior); } if (w_cos > 1) if (0 > xmlTextWriterWriteFormatAttribute(writer, BAD_CAST "transitionClasses", "%d", w_cos)) GHMM_LOG(LERROR, "failed to write no of transitionClasses"); /* write alphabet if applicable */ switch (f->modelType & (GHMM_kDiscreteHMM + GHMM_kTransitionClasses + GHMM_kPairHMM)) { case GHMM_kDiscreteHMM: rc = writeAlphabet(writer, f->model.d[number]->alphabet, kAlphabet); break; case (GHMM_kDiscreteHMM+GHMM_kTransitionClasses): /*rc = writeAlphabet(writer, f->model.ds[number]->alphabet, kAlphabet);*/ break; case (GHMM_kDiscreteHMM+GHMM_kPairHMM): case (GHMM_kDiscreteHMM+GHMM_kPairHMM+GHMM_kTransitionClasses): /*rc = writeAlphabet(writer, f->model.dp[number]->alphabets[0], kAlphabet); if (rc) { GHMM_LOG(LERROR, "writing first alphabet of discrete pair HMM failed"); goto STOP; } rc = writeAlphabet(writer, f->model.dp[number]->alphabets[1], kAlphabet);*/ break; } if (rc) { GHMM_LOG_PRINTF(LERROR, LOC, "writing alphabet for HMM %d (type %s) failed", number, strModeltype(f->modelType)); goto STOP; } /* write label alphabet if applicable */ if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM && f->modelType & GHMM_kLabeledStates) { if (writeAlphabet(writer, f->model.d[number]->label_alphabet, kLabelAlphabet)) GHMM_LOG(LERROR, "writing of label alphabet failed"); } /* write background distributions if applicable */ if ((f->modelType & PTR_TYPE_MASK) == GHMM_kDiscreteHMM && f->modelType & GHMM_kBackgroundDistributions) { if (writeBackground(writer, f->model.d[number]->bp)) GHMM_LOG(LERROR, "writing of background distributions failed"); } /* write all states */ for (i=0; i<N; i++) if (writeState(writer, f, number, i)) { GHMM_LOG_PRINTF(LERROR, LOC, "writing of state %d in HMM %d failed", i, number); goto STOP; } /* write all outgoing transitions */ for (i=0; i<N; i++) if (writeTransition(writer, f, number, i)) { GHMM_LOG_PRINTF(LERROR, LOC, "writing transitions of state %d in HMM %d failed", i, number); goto STOP; } /*end HMM*/ if (0 > xmlTextWriterEndElement(writer)) { GHMM_LOG(LERROR, "Error at xmlTextWriterEndElement (HMM)"); goto STOP; } return 0; STOP: return -1; #undef CUR_PROC }
/** Trains the ghmm_dmodel with a set of annotated sequences till convergence using gradient descent. Model must not have silent states. (checked in Python wrapper) @return trained model/NULL pointer success/error @param mo: pointer to a ghmm_dmodel @param sq: struct of annotated sequences @param eta: intial parameter eta (learning rate) @param no_steps number of training steps */ ghmm_dmodel* ghmm_dmodel_label_gradient_descent (ghmm_dmodel* mo, ghmm_dseq * sq, double eta, int no_steps) { #define CUR_PROC "ghmm_dmodel_label_gradient_descent" char * str; int runs = 0; double cur_perf, last_perf; ghmm_dmodel *last; last = ghmm_dmodel_copy(mo); last_perf = compute_performance (last, sq); while (eta > GHMM_EPS_PREC && runs < no_steps) { runs++; if (-1 == gradient_descent_onestep(mo, sq, eta)) { ghmm_dmodel_free(&last); return NULL; } cur_perf = compute_performance(mo, sq); if (last_perf < cur_perf) { /* if model is degenerated, lower eta and try again */ if (cur_perf > 0.0) { str = ighmm_mprintf(NULL, 0, "current performance = %g", cur_perf); GHMM_LOG(LINFO, str); m_free(str); ghmm_dmodel_free(&mo); mo = ghmm_dmodel_copy(last); eta *= .5; } else { /* Improvement insignificant, assume convergence */ if (fabs (last_perf - cur_perf) < cur_perf * (-1e-8)) { ghmm_dmodel_free(&last); str = ighmm_mprintf(NULL, 0, "convergence after %d steps.", runs); GHMM_LOG(LINFO, str); m_free(str); return 0; } if (runs < 175 || 0 == runs % 50) { str = ighmm_mprintf(NULL, 0, "Performance: %g\t improvement: %g\t step %d", cur_perf, cur_perf - last_perf, runs); GHMM_LOG(LINFO, str); m_free(str); } /* significant improvement, next iteration */ ghmm_dmodel_free(&last); last = ghmm_dmodel_copy(mo); last_perf = cur_perf; eta *= 1.07; } } /* no improvement */ else { if (runs < 175 || 0 == runs % 50) { str = ighmm_mprintf(NULL, 0, "Performance: %g\t !IMPROVEMENT: %g\t step %d", cur_perf, cur_perf - last_perf, runs); GHMM_LOG(LINFO, str); m_free(str); } /* try another training step */ runs++; eta *= .85; if (-1 == gradient_descent_onestep(mo, sq, eta)) { ghmm_dmodel_free(&last); return NULL; } cur_perf = compute_performance (mo, sq); str = ighmm_mprintf(NULL, 0, "Performance: %g\t ?Improvement: %g\t step %d", cur_perf, cur_perf - last_perf, runs); GHMM_LOG(LINFO, str); m_free(str); /* improvement, save and proceed with next iteration */ if (last_perf < cur_perf && cur_perf < 0.0) { ghmm_dmodel_free (&last); last = ghmm_dmodel_copy(mo); last_perf = cur_perf; } /* still no improvement, revert to saved model */ else { runs--; ghmm_dmodel_free(&mo); mo = ghmm_dmodel_copy(last); eta *= .9; } } } ghmm_dmodel_free(&last); return mo; #undef CUR_PROC }
/*============================================================================*/ int ghmm_cmodel_logp_joint(ghmm_cmodel *mo, const double *O, int len, const int *S, int slen, double *log_p) { # define CUR_PROC "ghmm_cmodel_logp_joint" int prevstate, state, state_pos=0, pos=0, j, osc=0; int dim = mo->dim; prevstate = state = S[0]; *log_p = log(mo->s[state].pi); if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */ ) { *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos)); pos+=dim; } for (state_pos=1; state_pos < slen || pos+dim <= len; state_pos++) { state = S[state_pos]; for (j=0; j < mo->s[state].in_states; ++j) { if (prevstate == mo->s[state].in_id[j]) break; } if (mo->cos > 1) { if (!mo->class_change->get_class) { GHMM_LOG(LERROR, "get_class not initialized"); goto STOP; } osc = mo->class_change->get_class(mo, O, mo->class_change->k, pos); if (osc >= mo->cos) { GHMM_LOG_PRINTF(LERROR, LOC, "get_class returned index %d " "but model has only %d classes!", osc, mo->cos); goto STOP; } } if (j == mo->s[state].in_states || fabs(mo->s[state].in_a[osc][j]) < GHMM_EPS_PREC) { GHMM_LOG_PRINTF(LERROR, LOC, "Sequence can't be built. There is no " "transition from state %d to %d.", prevstate, state); goto STOP; } *log_p += log(mo->s[state].in_a[osc][j]); if (!(mo->model_type & GHMM_kSilentStates) || 1 /* XXX !mo->silent[state] */) { *log_p += log(ghmm_cmodel_calc_b(mo->s+state, O+pos)); pos+=dim; } prevstate = state; } if (pos < len) GHMM_LOG_PRINTF(LINFO, LOC, "state sequence too short! processed only %d symbols", pos/dim); if (state_pos < slen) GHMM_LOG_PRINTF(LINFO, LOC, "sequence too short! visited only %d states", state_pos); return 0; STOP: return -1; # undef CUR_PROC }