/*============================================================================*/ double ighmm_rand_std_normal (int seed) { double r2, theta; # define CUR_PROC "ighmm_rand_std_normal" if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL return (gsl_ran_gaussian (RNG, 1.0)); #else /* Use the polar Box-Mueller transform */ /* double x, y, r2; do { x = 2.0 * GHMM_RNG_UNIFORM(RNG) - 1.0; y = 2.0 * GHMM_RNG_UNIFORM(RNG) - 1.0; r2 = (x * x) + (y * y); } while (r2 >= 1.0); return x * sqrt((-2.0 * log(r2)) / r2); */ r2 = -2.0 * log (GHMM_RNG_UNIFORM (RNG)); /* r2 ~ chi-square(2) */ theta = 2.0 * PI * GHMM_RNG_UNIFORM (RNG); /* theta ~ uniform(0, 2 \pi) */ return sqrt (r2) * cos (theta); #endif # undef CUR_PROC } /* ighmm_rand_std_normal */
//only uses first sequence int* ghmm_bayes_hmm_fbgibbs(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq, int burnIn, int seed){ #define CUR_PROC "ghmm_cmodel_fbgibbs" //XXX seed GHMM_RNG_SET (RNG, seed); int max_seq = ghmm_cseq_max_len(seq); double **alpha = ighmm_cmatrix_alloc(max_seq,mo->N); double ***pmats = ighmm_cmatrix_3d_alloc(max_seq, mo->N, mo->N); int **Q; ARRAY_CALLOC(Q, seq->seq_number); int seq_iter; for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){ ARRAY_CALLOC(Q[seq_iter], seq->seq_len[seq_iter]); } ghmm_sample_data data; ghmm_alloc_sample_data(bayes, &data); ghmm_clear_sample_data(&data, bayes);//XXX swap parameter for(; burnIn > 0; burnIn--){ for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){ ghmm_cmodel_fbgibbstep(mo,seq->seq[seq_iter],seq->seq_len[seq_iter], Q[seq_iter], alpha, pmats, NULL); ghmm_get_sample_data(&data, bayes, Q[seq_iter], seq->seq[seq_iter], seq->seq_len[seq_iter]); ghmm_update_model(mo, bayes, &data); ghmm_clear_sample_data(&data, bayes); } } ighmm_cmatrix_free(&alpha, max_seq); ighmm_cmatrix_3d_free(&pmats, max_seq,mo->N); return Q; STOP: return NULL; //XXX error handle #undef CUR_PROC }
/*============================================================================*/ int main(int argc, char* argv[]) { #define CUR_PROC "smix_hmm_main" #ifdef GHMM_OBSOLETE int exitcode = -1; if (argc != 4 && argc != 5) { printf("Insufficient arguments. Usage: \n"); printf("mix_hmm [Seq.File] [InitModel File] [Out File] <seed>\n"); goto STOP; } ghmm_rng_init(); if (argc == 5) GHMM_RNG_SET(RNG,atoi(argv[4])); else { ghmm_rng_timeseed(RNG); } exitcode = smix_hmm_run(argc, argv); /*------------------------------------------------------------------------*/ STOP: ighmm_mes(MES_WIN, "\n(%2.2T): Program finished with exitcode %d.\n", exitcode ); ighmm_mes_exit(); return(exitcode); #else /* GHMM_OBSOLETE */ fprintf (stderr, "cluster is obsolete. If you need it rebuild the GHMM with \"GHMM_OBSOLETE\"\n"); return 0; #endif /* GHMM_OBSOLETE */ # undef CUR_PROC } /* main */
/*============================================================================*/ int ighmm_rand_multivariate_normal (int dim, double *x, double *mue, double *sigmacd, int seed) { # define CUR_PROC "ighmm_rand_multivariate_normal" /* generate random vector of multivariate normal * * dim number of dimensions * x space to store resulting vector in * mue vector of means * sigmacd linearized cholesky decomposition of cov matrix * seed RNG seed * * see Barr & Slezak, A Comparison of Multivariate Normal Generators */ int i, j; #ifdef DO_WITH_GSL gsl_vector *y = gsl_vector_alloc(dim); gsl_vector *xgsl = gsl_vector_alloc(dim); gsl_matrix *cd = gsl_matrix_alloc(dim, dim); #endif if (seed != 0) { GHMM_RNG_SET (RNG, seed); /* do something here */ return 0; } else { #ifdef DO_WITH_GSL /* cholesky decomposition matrix */ for (i=0;i<dim;i++) { for (j=0;j<dim;j++) { gsl_matrix_set(cd, i, j, sigmacd[i*dim+j]); } } /* generate a random vector N(O,I) */ for (i=0;i<dim;i++) { gsl_vector_set(y, i, ighmm_rand_std_normal(seed)); } /* multiply cd with y */ gsl_blas_dgemv(CblasNoTrans, 1.0, cd, y, 0.0, xgsl); for (i=0;i<dim;i++) { x[i] = gsl_vector_get(xgsl, i) + mue[i]; } gsl_vector_free(y); gsl_vector_free(xgsl); gsl_matrix_free(cd); #else /* multivariate random numbers without gsl */ double randuni; for (i=0;i<dim;i++) { randuni = ighmm_rand_std_normal(seed); for (j=0;j<dim;j++) { if (i==0) x[j] = mue[j]; x[j] += randuni * sigmacd[j*dim+i]; } } #endif return 0; } # undef CUR_PROC } /* ighmm_rand_multivariate_normal */
/* linear interpolation: */ if (i >= PDFLEN - 1) { i = PDFLEN - 1; pdf_x = y * pdf_stdnormal[i]; } else pdf_x = y * (pdf_stdnormal[i] + (z - i * X_STEP_PDF) * (pdf_stdnormal[i + 1] - pdf_stdnormal[i]) / X_STEP_PDF); return (pdf_x); STOP: return (-1.0); # undef CUR_PROC } /* double ighmm_rand_normal_density_approx */ double ighmm_rand_dirichlet(int seed, int len, double *alpha, double *theta){ if (seed != 0) { GHMM_RNG_SET(RNG, seed); } #ifdef DO_WITH_GSL gsl_ran_dirichlet(RNG, len, alpha, theta); #else printf("not implemted without gsl. Compile with gsl to use dirichlet"); #endif }
double ighmm_rand_normal_right (double a, double mue, double u, int seed) { # define CUR_PROC "ighmm_rand_normal_right" double x = -1; double sigma; #ifdef DO_WITH_GSL double s; #else double U, Us, Us1, Feps, t, T; #endif if (u <= 0.0) { GHMM_LOG(LCONVERTED, "u <= 0.0 not allowed\n"); goto STOP; } sigma = sqrt(u); if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL /* move boundary to lower values in order to achieve maximum at mue gsl_ran_gaussian_tail(generator, lower_boundary, sigma) */ return mue + gsl_ran_gaussian_tail(RNG, a - mue, sqrt (u)); #else /* DO_WITH_GSL */ /* Inverse transformation with restricted sampling by Fishman */ U = GHMM_RNG_UNIFORM(RNG); Feps = ighmm_rand_get_PHI((a-mue) / sigma); Us = Feps + (1-Feps) * U; Us1 = 1-Us; t = m_min (Us, Us1); t = sqrt (-log (t * t)); T = sigma * (t - (C0 + t * (C1 + t * C2)) / (1 + t * (D1 + t * (D2 + t * D3)))); if (Us < Us1) x = mue - T; else x = mue + T; #endif /* DO_WITH_GSL */ STOP: return x; # undef CUR_PROC } /* randvar_normal_pos */
/*============================================================================*/ double ighmm_rand_uniform_int (int seed, int K) { # define CUR_PROC "ighmm_rand_uniform_int" if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL /* more direct solution than old version ! */ return (double) gsl_rng_uniform_int (RNG, K); #else return (double) ((int) (((double) K) * GHMM_RNG_UNIFORM (RNG))); #endif # undef CUR_PROC } /* ighmm_rand_uniform_int */
/*============================================================================*/ double ighmm_rand_normal(double mue, double u, int seed) { double x; # define CUR_PROC "ighmm_rand_normal" if (seed != 0) { GHMM_RNG_SET(RNG, seed); } #ifdef DO_WITH_GSL return gsl_ran_gaussian(RNG, sqrt (u)) + mue; #else x = sqrt(u) * ighmm_rand_std_normal(seed) + mue; return x; #endif # undef CUR_PROC } /* ighmm_rand_normal */
/*============================================================================*/ double randvar_normal (double mue, double u, int seed) { # define CUR_PROC "randvar_normal" if (seed != 0) { GHMM_RNG_SET (RNG, seed); return (1.0 * sqrt (u) + mue); } else { #ifdef DO_WITH_GSL return (gsl_ran_gaussian (RNG, sqrt (u)) + mue); #else double x; x = sqrt (u) * randvar_std_normal (seed) + mue; return (x); #endif } # undef CUR_PROC } /* randvar_normal */
int* ghmm_bayes_hmm_fbgibbs_compressed(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq, int burnIn, int seed, double width, double delta, int max_len_permitted){ #define CUR_PROC "ghmm_cmodel_fbgibbs" //XXX seed GHMM_RNG_SET (RNG, seed); block_stats *stats = compress_observations(seq, width*delta, delta); stats = merge_observations(seq, width, max_len_permitted, stats); print_stats(stats, seq->seq_len[0]); //get max_block_len int max_block_len = stats->length[0]; int i; for(i = 1; i < stats->total; i++){ if(max_block_len < stats->length[i]) max_block_len = stats->length[i]; } //printf("max b len %d\n", max_block_len); double ***b = ighmm_cmatrix_3d_alloc(stats->total, mo->N, 2); double **alpha = ighmm_cmatrix_alloc(seq->seq_len[0],mo->N); double ***pmats = ighmm_cmatrix_3d_alloc(seq->seq_len[0], mo->N, mo->N); int *Q; ARRAY_CALLOC(Q, seq->seq_len[0]);//XXX extra length for compressed ghmm_sample_data data; ghmm_alloc_sample_data(bayes, &data); ghmm_clear_sample_data(&data, bayes);//XXX swap parameter for(; burnIn > 0; burnIn--){ //XXX only using seq 0 precompute_block_emission(mo, stats, max_block_len, b);//XXX maxlen ghmm_cmodel_fbgibbstep(mo,seq->seq[0], stats->total, Q, alpha, pmats, b); ghmm_get_sample_data_compressed(&data, bayes, Q, seq->seq[0], stats->total, stats); ghmm_update_model(mo, bayes, &data); ghmm_clear_sample_data(&data, bayes); } ighmm_cmatrix_free(&alpha, seq->seq_len[0]); ighmm_cmatrix_3d_free(&pmats, seq->seq_len[0],mo->N); ighmm_cmatrix_3d_free(&b, stats->total, mo->N); free_block_stats(&stats); return Q; STOP: return NULL; //XXX error handle #undef CUR_PROC }
/*===========================================================================*/ double ighmm_rand_uniform_cont (int seed, double max, double min) { # define CUR_PROC "ighmm_rand_uniform_cont" if (max <= min) { GHMM_LOG(LCONVERTED, "max <= min not allowed\n"); goto STOP; } if (seed != 0) { GHMM_RNG_SET (RNG, seed); } #ifdef DO_WITH_GSL return (double)(((double)gsl_rng_uniform (RNG)*(max-min)) + min); #else return (double)((GHMM_RNG_UNIFORM (RNG))*(max-min) + min ); #endif STOP: return (-1.0); # undef CUR_PROC } /* ighmm_rand_uniform_cont */
ghmm_cseq *ghmm_sgenerate_extensions (ghmm_cmodel * smo, ghmm_cseq * sqd_short, int seed, int global_len, sgeneration_mode_t mode) { #define CUR_PROC "ghmm_sgenerate_extensions" ghmm_cseq *sq = NULL; int i, j, t, n, m, len = global_len, short_len, max_short_len = 0, up = 0; #ifdef bausparkasse int tilgphase = 0; #endif /* int *v_path = NULL; */ double log_p, *initial_distribution, **alpha, *scale, p, sum; /* aicj */ int class = -1; int pos; /* TEMP */ if (mode == all_viterbi || mode == viterbi_viterbi || mode == viterbi_all) { GHMM_LOG(LCONVERTED, "Error: mode not implemented yet\n"); goto STOP; } if (len <= 0) /* no global length; model should have a final state */ len = (int) GHMM_MAX_SEQ_LEN; max_short_len = ghmm_cseq_max_len (sqd_short); /*---------------alloc-------------------------------------------------*/ sq = ghmm_cseq_calloc (sqd_short->seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (initial_distribution, smo->N); /* is needed in cfoba_forward() */ alpha = ighmm_cmatrix_alloc (max_short_len, smo->N); if (!alpha) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (scale, max_short_len); ghmm_rng_init (); GHMM_RNG_SET (RNG, seed); /*---------------main loop over all seqs-------------------------------*/ for (n = 0; n < sqd_short->seq_number; n++) { ARRAY_CALLOC (sq->seq[n], len*(smo->dim)); short_len = sqd_short->seq_len[n]; if (len < short_len) { GHMM_LOG(LCONVERTED, "Error: given sequence is too long\n"); goto STOP; } ghmm_cseq_copy (sq->seq[n], sqd_short->seq[n], short_len); #ifdef GHMM_OBSOLETE sq->seq_label[n] = sqd_short->seq_label[n]; #endif /* GHMM_OBSOLETE */ /* Initial distribution */ /* 1. Viterbi-state */ #if 0 /* wieder aktivieren, wenn ghmm_cmodel_viterbi realisiert */ if (mode == viterbi_all || mode == viterbi_viterbi) { v_path = cviterbi (smo, sqd_short->seq[n], short_len, &log_p); if (v_path[short_len - 1] < 0 || v_path[short_len - 1] >= smo->N) { GHMM_LOG(LCONVERTED, "Warning:Error: from viterbi()\n"); sq->seq_len[n] = short_len; m_realloc (sq->seq[n], short_len); continue; } m_memset (initial_distribution, 0, smo->N); initial_distribution[v_path[short_len - 1]] = 1.0; /* all other 0 */ m_free (v_path); } #endif /* 2. Initial Distribution ??? Pi(i) = alpha_t(i)/P(O|lambda) */ if (mode == all_all || mode == all_viterbi) { if (short_len > 0) { if (ghmm_cmodel_forward (smo, sqd_short->seq[n], short_len, NULL /* ?? */ , alpha, scale, &log_p)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } sum = 0.0; for (i = 0; i < smo->N; i++) { /* alpha ist skaliert! */ initial_distribution[i] = alpha[short_len - 1][i]; sum += initial_distribution[i]; } /* nicht ok.? auf eins skalieren? */ for (i = 0; i < smo->N; i++) initial_distribution[i] /= sum; } else { for (i = 0; i < smo->N; i++) initial_distribution[i] = smo->s[i].pi; } } /* if short_len > 0: Initial state == final state from sqd_short; no output here else choose inittial state according to pi and do output */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (i = 0; i < smo->N; i++) { sum += initial_distribution[i]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (i == smo->N) { i--; while (i > 0 && initial_distribution[i] == 0.0) i--; } t = 0; pos = t * smo->dim; if (short_len == 0) { /* Output in state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (m == smo->M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } ghmm_cmodel_get_random_var(smo, i, m, sq->seq[n]+pos); if (smo->cos == 1) { class = 0; } else { if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); goto STOP; } /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/ class = smo->class_change->get_class (smo, sq->seq[n], n, t); } t++; pos += smo->dim; }
double randvar_normal_pos (double mue, double u, int seed) { # define CUR_PROC "randvar_normal_pos" double x = -1; double sigma; #ifdef DO_WITH_GSL double s; #else double U, Us, Us1, Feps, Feps1, t, T; #endif if (u <= 0.0) { mes_prot ("u <= 0.0 not allowed\n"); goto STOP; } sigma = sqrt (u); if (seed != 0) { GHMM_RNG_SET (RNG, seed); return (1.0); } #ifdef DO_WITH_GSL /* up to version 0.8 gsl_ran_gaussian_tail can not handle negative cutoff */ #define GSL_RAN_GAUSSIAN_TAIL_BUG 1 #ifdef GSL_RAN_GAUSSIAN_TAIL_BUG s = (-mue) / sigma; if (s < 1) { do { x = gsl_ran_gaussian (RNG, 1.0); } while (x < s); return x * sigma + mue; } #endif /* GSL_RAN_GAUSSIAN_TAIL_BUG */ /* move boundary to lower values in order to achieve maximum at mue gsl_ran_gaussian_tail(generator, lower_boundary, sigma) */ return gsl_ran_gaussian_tail (RNG, -mue, sqrt (u)) + mue; #else /* DO_WITH_GSL */ /* Method: Generate Gauss-distributed random nunbers (with GSL-lib.), until a positive one is found -> not very effective if mue << 0 while (x < 0.0) { x = sigma * randvar_std_normal(seed) + mue; } */ /* Inverse transformation with restricted sampling by Fishman */ U = GHMM_RNG_UNIFORM (RNG); Feps = randvar_get_PHI (-(EPS_NDT + mue) / sigma); Us = Feps + (1 - Feps) * U; /* Numerically better: 1-Us = 1-Feps - (1-Feps)*U, therefore: Feps1 = 1-Feps, Us1 = 1-Us */ Feps1 = randvar_get_PHI ((EPS_NDT + mue) / sigma); Us1 = Feps1 - Feps1 * U; t = m_min (Us, Us1); t = sqrt (-log (t * t)); T = sigma * (t - (C0 + t * (C1 + t * C2)) / (1 + t * (D1 + t * (D2 + t * D3)))); if (Us - 0.5 < 0) x = mue - T; else x = mue + T; #endif /* DO_WITH_GSL */ STOP: return (x); # undef CUR_PROC } /* randvar_normal_pos */