//assumes normal dist //precomputes b for forward algo void precompute_block_emission(ghmm_cmodel *mo, block_stats *stats, int max_block_len, double ***b){ #define CUR_PROC "precalculate_block_emission" //precompute intermediate values double **mean2, **std, **transition; mean2 = ighmm_cmatrix_alloc(mo->N, max_block_len+1); std = ighmm_cmatrix_alloc(mo->N, max_block_len+1); transition = ighmm_cmatrix_alloc(mo->N, max_block_len+1); precompute_blocks(mo, mean2, std, transition, max_block_len+1); int t, i; double exponent; for(t = 0; t < stats->total; t++){ for(i = 0; i < mo->N; i++){//flip order //printf("sumsqrs %e\n", stats->moment2[t]); //printf("transition %e\n", transition[i][stats->length[t]]); exponent = -1 * ( stats->moment2[t] - 2*stats->moment1[t] * (mo->s+i)->e->mean.val + mean2[i][stats->length[t]] ) / (2 * (mo->s+i)->e->variance.val); b[t][i][1] = transition[i][stats->length[t]] * exp( exponent ) / std[i][stats->length[t]]; //printf("exp = %e\n", exponent); //printf("b %d %d = %e\n", t, i, b[t][i][1]); } } ighmm_cmatrix_free(&mean2, mo->N); ighmm_cmatrix_free(&std, mo->N); ighmm_cmatrix_free(&transition, mo->N); STOP: //XXX ERROR return; #undef CUR_PROC }
//only uses first sequence int* ghmm_bayes_hmm_fbgibbs(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq, int burnIn, int seed){ #define CUR_PROC "ghmm_cmodel_fbgibbs" //XXX seed GHMM_RNG_SET (RNG, seed); int max_seq = ghmm_cseq_max_len(seq); double **alpha = ighmm_cmatrix_alloc(max_seq,mo->N); double ***pmats = ighmm_cmatrix_3d_alloc(max_seq, mo->N, mo->N); int **Q; ARRAY_CALLOC(Q, seq->seq_number); int seq_iter; for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){ ARRAY_CALLOC(Q[seq_iter], seq->seq_len[seq_iter]); } ghmm_sample_data data; ghmm_alloc_sample_data(bayes, &data); ghmm_clear_sample_data(&data, bayes);//XXX swap parameter for(; burnIn > 0; burnIn--){ for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){ ghmm_cmodel_fbgibbstep(mo,seq->seq[seq_iter],seq->seq_len[seq_iter], Q[seq_iter], alpha, pmats, NULL); ghmm_get_sample_data(&data, bayes, Q[seq_iter], seq->seq[seq_iter], seq->seq_len[seq_iter]); ghmm_update_model(mo, bayes, &data); ghmm_clear_sample_data(&data, bayes); } } ighmm_cmatrix_free(&alpha, max_seq); ighmm_cmatrix_3d_free(&pmats, max_seq,mo->N); return Q; STOP: return NULL; //XXX error handle #undef CUR_PROC }
ghmm_dpseq * ghmm_dpseq_init(int length, int number_of_alphabets, int number_of_d_seqs) { #define CUR_PROC "ghmm_dpseq_init" ghmm_dpseq * seq; ARRAY_MALLOC (seq, 1); seq->length = length; seq->number_of_alphabets = number_of_alphabets; seq->number_of_d_seqs = number_of_d_seqs; seq->seq = NULL; seq->d_value = NULL; if (number_of_alphabets > 0) { seq->seq = ighmm_dmatrix_alloc(number_of_alphabets, length); if (!(seq->seq)) goto STOP; } if (number_of_d_seqs > 0) { seq->d_value = ighmm_cmatrix_alloc(number_of_d_seqs, length); if (!(seq->d_value)) goto STOP; } return seq; STOP: /* Label STOP from ARRAY_[CM]ALLOC */ ghmm_dpseq_free(seq); return NULL; #undef CUR_PROC }
static int smix_hmm_run(int argc, char* argv[]) { #define CUR_PROC "smix_hmm_run" int k, exitcode = -1, smo_number, sqd_fields; ghmm_cseq **sqd = NULL; ghmm_cmodel **smo = NULL; double **cp = NULL; FILE *outfile = NULL; /* read sequences and initial models */ sqd = ghmm_cseq_read(argv[1], &sqd_fields); if (!sqd) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} if (sqd_fields > 1) printf("Warning: Seq. File contains multiple Seq. Fields; use only the first one\n"); smo = ghmm_cmodel_read(argv[2], &smo_number); if (!smo) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} /* open output file */ if(!(outfile = ighmm_mes_fopen(argv[3], "wt"))) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} /* matrix for component probs., */ cp = ighmm_cmatrix_alloc(sqd[0]->seq_number, smo_number); if (!cp) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP;} /* set last arg in ghmm_smixturehmm_init() : 1 = strict random partition; cp = 0/1 2. ghmm_smap_bayes from initial models 3. cp = 1 for best model, cp = 0 for other models 4. open 5. no start partition == equal cp for each model */ if (ghmm_smixturehmm_init(cp, sqd[0], smo, smo_number, 5) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* clustering */ if (ghmm_smixturehmm_cluster(outfile, cp, sqd[0], smo, smo_number) == -1) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } /* print trained models */ for (k = 0; k < smo_number; k++) ghmm_cmodel_print(outfile, smo[k]); if (outfile) fclose(outfile); exitcode = 0; STOP: return exitcode; # undef CUR_PROC }
int ghmm_alloc_sample_data(ghmm_bayes_hmm *mo, ghmm_sample_data *data){ #define CUR_PROC "ghmm_alloc_sample_data" //XXX must do alloc matrices for dim >1 int i; data->transition = ighmm_cmatrix_alloc(mo->N, mo->N); ARRAY_MALLOC(data->state_data, mo->N); for(i = 0; i < mo->N; i++){ ARRAY_MALLOC(data->state_data[i], mo->M[i]); /*for(i = 0; i < mo->M[i]; i++){//only needed for dim >1 ghmm_alloc_emission_data(data->state_data[i][j], ghmm_bayes_hmm->params[i][j]) }*/ } return 0; STOP: return -1; #undef CUR_PROC }
int* ghmm_bayes_hmm_fbgibbs_compressed(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq, int burnIn, int seed, double width, double delta, int max_len_permitted){ #define CUR_PROC "ghmm_cmodel_fbgibbs" //XXX seed GHMM_RNG_SET (RNG, seed); block_stats *stats = compress_observations(seq, width*delta, delta); stats = merge_observations(seq, width, max_len_permitted, stats); print_stats(stats, seq->seq_len[0]); //get max_block_len int max_block_len = stats->length[0]; int i; for(i = 1; i < stats->total; i++){ if(max_block_len < stats->length[i]) max_block_len = stats->length[i]; } //printf("max b len %d\n", max_block_len); double ***b = ighmm_cmatrix_3d_alloc(stats->total, mo->N, 2); double **alpha = ighmm_cmatrix_alloc(seq->seq_len[0],mo->N); double ***pmats = ighmm_cmatrix_3d_alloc(seq->seq_len[0], mo->N, mo->N); int *Q; ARRAY_CALLOC(Q, seq->seq_len[0]);//XXX extra length for compressed ghmm_sample_data data; ghmm_alloc_sample_data(bayes, &data); ghmm_clear_sample_data(&data, bayes);//XXX swap parameter for(; burnIn > 0; burnIn--){ //XXX only using seq 0 precompute_block_emission(mo, stats, max_block_len, b);//XXX maxlen ghmm_cmodel_fbgibbstep(mo,seq->seq[0], stats->total, Q, alpha, pmats, b); ghmm_get_sample_data_compressed(&data, bayes, Q, seq->seq[0], stats->total, stats); ghmm_update_model(mo, bayes, &data); ghmm_clear_sample_data(&data, bayes); } ighmm_cmatrix_free(&alpha, seq->seq_len[0]); ighmm_cmatrix_3d_free(&pmats, seq->seq_len[0],mo->N); ighmm_cmatrix_3d_free(&b, stats->total, mo->N); free_block_stats(&stats); return Q; STOP: return NULL; //XXX error handle #undef CUR_PROC }
ghmm_cseq *ghmm_sgenerate_extensions (ghmm_cmodel * smo, ghmm_cseq * sqd_short, int seed, int global_len, sgeneration_mode_t mode) { #define CUR_PROC "ghmm_sgenerate_extensions" ghmm_cseq *sq = NULL; int i, j, t, n, m, len = global_len, short_len, max_short_len = 0, up = 0; #ifdef bausparkasse int tilgphase = 0; #endif /* int *v_path = NULL; */ double log_p, *initial_distribution, **alpha, *scale, p, sum; /* aicj */ int class = -1; int pos; /* TEMP */ if (mode == all_viterbi || mode == viterbi_viterbi || mode == viterbi_all) { GHMM_LOG(LCONVERTED, "Error: mode not implemented yet\n"); goto STOP; } if (len <= 0) /* no global length; model should have a final state */ len = (int) GHMM_MAX_SEQ_LEN; max_short_len = ghmm_cseq_max_len (sqd_short); /*---------------alloc-------------------------------------------------*/ sq = ghmm_cseq_calloc (sqd_short->seq_number); if (!sq) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (initial_distribution, smo->N); /* is needed in cfoba_forward() */ alpha = ighmm_cmatrix_alloc (max_short_len, smo->N); if (!alpha) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } ARRAY_CALLOC (scale, max_short_len); ghmm_rng_init (); GHMM_RNG_SET (RNG, seed); /*---------------main loop over all seqs-------------------------------*/ for (n = 0; n < sqd_short->seq_number; n++) { ARRAY_CALLOC (sq->seq[n], len*(smo->dim)); short_len = sqd_short->seq_len[n]; if (len < short_len) { GHMM_LOG(LCONVERTED, "Error: given sequence is too long\n"); goto STOP; } ghmm_cseq_copy (sq->seq[n], sqd_short->seq[n], short_len); #ifdef GHMM_OBSOLETE sq->seq_label[n] = sqd_short->seq_label[n]; #endif /* GHMM_OBSOLETE */ /* Initial distribution */ /* 1. Viterbi-state */ #if 0 /* wieder aktivieren, wenn ghmm_cmodel_viterbi realisiert */ if (mode == viterbi_all || mode == viterbi_viterbi) { v_path = cviterbi (smo, sqd_short->seq[n], short_len, &log_p); if (v_path[short_len - 1] < 0 || v_path[short_len - 1] >= smo->N) { GHMM_LOG(LCONVERTED, "Warning:Error: from viterbi()\n"); sq->seq_len[n] = short_len; m_realloc (sq->seq[n], short_len); continue; } m_memset (initial_distribution, 0, smo->N); initial_distribution[v_path[short_len - 1]] = 1.0; /* all other 0 */ m_free (v_path); } #endif /* 2. Initial Distribution ??? Pi(i) = alpha_t(i)/P(O|lambda) */ if (mode == all_all || mode == all_viterbi) { if (short_len > 0) { if (ghmm_cmodel_forward (smo, sqd_short->seq[n], short_len, NULL /* ?? */ , alpha, scale, &log_p)) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP; } sum = 0.0; for (i = 0; i < smo->N; i++) { /* alpha ist skaliert! */ initial_distribution[i] = alpha[short_len - 1][i]; sum += initial_distribution[i]; } /* nicht ok.? auf eins skalieren? */ for (i = 0; i < smo->N; i++) initial_distribution[i] /= sum; } else { for (i = 0; i < smo->N; i++) initial_distribution[i] = smo->s[i].pi; } } /* if short_len > 0: Initial state == final state from sqd_short; no output here else choose inittial state according to pi and do output */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (i = 0; i < smo->N; i++) { sum += initial_distribution[i]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (i == smo->N) { i--; while (i > 0 && initial_distribution[i] == 0.0) i--; } t = 0; pos = t * smo->dim; if (short_len == 0) { /* Output in state i */ p = GHMM_RNG_UNIFORM (RNG); sum = 0.0; for (m = 0; m < smo->M; m++) { sum += smo->s[i].c[m]; if (sum >= p) break; } /* error due to incorrect normalization ?? */ if (m == smo->M) { m--; while (m > 0 && smo->s[i].c[m] == 0.0) m--; } ghmm_cmodel_get_random_var(smo, i, m, sq->seq[n]+pos); if (smo->cos == 1) { class = 0; } else { if (!smo->class_change->get_class) { printf ("ERROR: get_class not initialized\n"); goto STOP; } /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/ class = smo->class_change->get_class (smo, sq->seq[n], n, t); } t++; pos += smo->dim; }