Exemplo n.º 1
0
//assumes normal dist
//precomputes b for forward algo
void precompute_block_emission(ghmm_cmodel *mo, block_stats *stats, 
        int max_block_len, double ***b){
#define CUR_PROC "precalculate_block_emission"
    //precompute intermediate values
    double **mean2, **std, **transition;
    mean2 = ighmm_cmatrix_alloc(mo->N, max_block_len+1);
    std = ighmm_cmatrix_alloc(mo->N, max_block_len+1);
    transition = ighmm_cmatrix_alloc(mo->N, max_block_len+1);
    precompute_blocks(mo, mean2, std, transition, max_block_len+1);


    int t, i;
    double exponent;
    for(t = 0; t < stats->total; t++){
        for(i = 0; i < mo->N; i++){//flip order
            //printf("sumsqrs %e\n", stats->moment2[t]);
            //printf("transition %e\n", transition[i][stats->length[t]]);
            exponent = -1 * ( stats->moment2[t] - 2*stats->moment1[t] *
                    (mo->s+i)->e->mean.val + mean2[i][stats->length[t]] ) /
                    (2 * (mo->s+i)->e->variance.val);
                    
            b[t][i][1] = transition[i][stats->length[t]] * exp( exponent ) / 
                std[i][stats->length[t]];
            //printf("exp = %e\n", exponent);
            //printf("b %d %d = %e\n", t, i, b[t][i][1]);
        }
    }
    ighmm_cmatrix_free(&mean2, mo->N);
    ighmm_cmatrix_free(&std, mo->N);
    ighmm_cmatrix_free(&transition, mo->N);
STOP:
    //XXX ERROR
    return;
#undef CUR_PROC
}
Exemplo n.º 2
0
//only uses first sequence
int* ghmm_bayes_hmm_fbgibbs(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq,
         int burnIn, int seed){
#define CUR_PROC "ghmm_cmodel_fbgibbs"
    //XXX seed
    GHMM_RNG_SET (RNG, seed);
    int max_seq = ghmm_cseq_max_len(seq);
    double **alpha = ighmm_cmatrix_alloc(max_seq,mo->N);
    double ***pmats = ighmm_cmatrix_3d_alloc(max_seq, mo->N, mo->N);
    int **Q; 
    ARRAY_CALLOC(Q, seq->seq_number);
    int seq_iter;
    for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){
        ARRAY_CALLOC(Q[seq_iter], seq->seq_len[seq_iter]);
    }

    ghmm_sample_data data;
    ghmm_alloc_sample_data(bayes, &data);
    ghmm_clear_sample_data(&data, bayes);//XXX swap parameter
    for(; burnIn > 0; burnIn--){
        for(seq_iter = 0; seq_iter < seq->seq_number; seq_iter++){
            ghmm_cmodel_fbgibbstep(mo,seq->seq[seq_iter],seq->seq_len[seq_iter], Q[seq_iter],
                    alpha, pmats, NULL);
            ghmm_get_sample_data(&data, bayes, Q[seq_iter], seq->seq[seq_iter], 
                    seq->seq_len[seq_iter]); 
            ghmm_update_model(mo, bayes, &data);
            ghmm_clear_sample_data(&data, bayes);
        }
    }
    ighmm_cmatrix_free(&alpha, max_seq);
    ighmm_cmatrix_3d_free(&pmats, max_seq,mo->N);
    return Q;
STOP:
    return NULL; //XXX error handle
#undef CUR_PROC
}
Exemplo n.º 3
0
ghmm_dpseq * ghmm_dpseq_init(int length, int number_of_alphabets, int number_of_d_seqs) {
#define CUR_PROC "ghmm_dpseq_init"
  ghmm_dpseq * seq;

  ARRAY_MALLOC (seq, 1);

  seq->length = length;
  seq->number_of_alphabets = number_of_alphabets;
  seq->number_of_d_seqs = number_of_d_seqs;
  seq->seq = NULL;
  seq->d_value = NULL;
  if (number_of_alphabets > 0) {
    seq->seq = ighmm_dmatrix_alloc(number_of_alphabets, length); 
    if (!(seq->seq)) goto STOP;
  }
  if (number_of_d_seqs > 0) {
    seq->d_value = ighmm_cmatrix_alloc(number_of_d_seqs, length);
    if (!(seq->d_value)) goto STOP;
  }

  return seq;
STOP:     /* Label STOP from ARRAY_[CM]ALLOC */
  ghmm_dpseq_free(seq);
  return NULL;
#undef CUR_PROC
}
Exemplo n.º 4
0
static int smix_hmm_run(int argc, char* argv[]) {
#define CUR_PROC "smix_hmm_run"

  int k, exitcode = -1, smo_number, sqd_fields;
  ghmm_cseq **sqd = NULL;
  ghmm_cmodel **smo = NULL;
  double **cp = NULL;
  FILE *outfile = NULL;

  /* read sequences and initial models */
  sqd = ghmm_cseq_read(argv[1], &sqd_fields);
  if (!sqd) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  if (sqd_fields > 1)
    printf("Warning: Seq. File contains multiple Seq. Fields; use only the first one\n");
  smo = ghmm_cmodel_read(argv[2], &smo_number);
  if (!smo) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}

  /* open output file */
  if(!(outfile = ighmm_mes_fopen(argv[3], "wt"))) {GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}
  
  /* matrix for component probs., */
  cp = ighmm_cmatrix_alloc(sqd[0]->seq_number, smo_number);
  if (!cp) { GHMM_LOG_QUEUED(LCONVERTED); goto STOP;}

  /* set last arg in ghmm_smixturehmm_init() : 
     1 = strict random partition; cp = 0/1
     2. ghmm_smap_bayes from initial models
     3. cp = 1 for best model, cp = 0 for other models 
     4. open
     5. no start partition == equal cp for each model
  */
  if (ghmm_smixturehmm_init(cp, sqd[0], smo, smo_number, 5) == -1) {
    GHMM_LOG_QUEUED(LCONVERTED); goto STOP;
  }
  /* clustering */
  if (ghmm_smixturehmm_cluster(outfile, cp, sqd[0], smo, smo_number) == -1) {
    GHMM_LOG_QUEUED(LCONVERTED); goto STOP;
  }

  /* print trained models */
  for (k = 0; k < smo_number; k++)
    ghmm_cmodel_print(outfile, smo[k]);  

  if (outfile) fclose(outfile);
  exitcode = 0;
 STOP:
  return exitcode;

# undef CUR_PROC
}
Exemplo n.º 5
0
int ghmm_alloc_sample_data(ghmm_bayes_hmm *mo, ghmm_sample_data *data){
#define CUR_PROC "ghmm_alloc_sample_data"
//XXX must do alloc matrices for dim >1
    int i;
    data->transition = ighmm_cmatrix_alloc(mo->N, mo->N);
    ARRAY_MALLOC(data->state_data, mo->N);
    for(i = 0; i < mo->N; i++){
        ARRAY_MALLOC(data->state_data[i], mo->M[i]);
        /*for(i = 0; i < mo->M[i]; i++){//only needed for dim >1
            ghmm_alloc_emission_data(data->state_data[i][j], ghmm_bayes_hmm->params[i][j])
        }*/
    }
    return 0;
STOP:
    return -1;
#undef CUR_PROC
}
Exemplo n.º 6
0
int* ghmm_bayes_hmm_fbgibbs_compressed(ghmm_bayes_hmm *bayes, ghmm_cmodel *mo, ghmm_cseq* seq,
         int burnIn, int seed, double width, double delta, int max_len_permitted){
#define CUR_PROC "ghmm_cmodel_fbgibbs"
    //XXX seed
    GHMM_RNG_SET (RNG, seed);

    block_stats *stats = compress_observations(seq, width*delta, delta);
    stats = merge_observations(seq, width, max_len_permitted, stats);
    print_stats(stats, seq->seq_len[0]);
    //get max_block_len
    int max_block_len = stats->length[0];
    int i;
    for(i = 1; i < stats->total; i++){
        if(max_block_len < stats->length[i])
            max_block_len = stats->length[i];
    }
    //printf("max b len %d\n", max_block_len);
    double ***b = ighmm_cmatrix_3d_alloc(stats->total, mo->N, 2);
    double **alpha = ighmm_cmatrix_alloc(seq->seq_len[0],mo->N);
    double ***pmats = ighmm_cmatrix_3d_alloc(seq->seq_len[0], mo->N, mo->N);
    int *Q; 
    ARRAY_CALLOC(Q, seq->seq_len[0]);//XXX extra length for compressed
    ghmm_sample_data data;
    ghmm_alloc_sample_data(bayes, &data);
    ghmm_clear_sample_data(&data, bayes);//XXX swap parameter 
    for(; burnIn > 0; burnIn--){
        //XXX only using seq 0
        precompute_block_emission(mo, stats, max_block_len, b);//XXX maxlen
        ghmm_cmodel_fbgibbstep(mo,seq->seq[0], stats->total, Q, alpha, pmats, b);
        ghmm_get_sample_data_compressed(&data, bayes, Q, seq->seq[0], 
                stats->total, stats); 
        ghmm_update_model(mo, bayes, &data);
        ghmm_clear_sample_data(&data, bayes);
    }
    ighmm_cmatrix_free(&alpha, seq->seq_len[0]);
    ighmm_cmatrix_3d_free(&pmats, seq->seq_len[0],mo->N);
    ighmm_cmatrix_3d_free(&b, stats->total, mo->N);
    free_block_stats(&stats);
    return Q;
STOP:
    return NULL; //XXX error handle
#undef CUR_PROC
}
Exemplo n.º 7
0
ghmm_cseq *ghmm_sgenerate_extensions (ghmm_cmodel * smo, ghmm_cseq * sqd_short,
                                    int seed, int global_len,
                                    sgeneration_mode_t mode)
{
#define CUR_PROC "ghmm_sgenerate_extensions"
  ghmm_cseq *sq = NULL;
  int i, j, t, n, m, len = global_len, short_len, max_short_len = 0, up = 0;
#ifdef bausparkasse
  int tilgphase = 0;
#endif
  /* int *v_path = NULL; */
  double log_p, *initial_distribution, **alpha, *scale, p, sum;
  /* aicj */
  int class = -1;
  int pos;

  /* TEMP */
  if (mode == all_viterbi || mode == viterbi_viterbi || mode == viterbi_all) {
    GHMM_LOG(LCONVERTED, "Error: mode not implemented yet\n");
    goto STOP;
  }

  if (len <= 0)
    /* no global length; model should have a final state */
    len = (int) GHMM_MAX_SEQ_LEN;
  max_short_len = ghmm_cseq_max_len (sqd_short);

  /*---------------alloc-------------------------------------------------*/
  sq = ghmm_cseq_calloc (sqd_short->seq_number);
  if (!sq) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (initial_distribution, smo->N);
  /* is needed in cfoba_forward() */
  alpha = ighmm_cmatrix_alloc (max_short_len, smo->N);
  if (!alpha) {
    GHMM_LOG_QUEUED(LCONVERTED);
    goto STOP;
  }
  ARRAY_CALLOC (scale, max_short_len);
  ghmm_rng_init ();
  GHMM_RNG_SET (RNG, seed);

  /*---------------main loop over all seqs-------------------------------*/
  for (n = 0; n < sqd_short->seq_number; n++) {
    ARRAY_CALLOC (sq->seq[n], len*(smo->dim));
    short_len = sqd_short->seq_len[n];
    if (len < short_len) {
      GHMM_LOG(LCONVERTED, "Error: given sequence is too long\n");
      goto STOP;
    }
    ghmm_cseq_copy (sq->seq[n], sqd_short->seq[n], short_len);
#ifdef GHMM_OBSOLETE
    sq->seq_label[n] = sqd_short->seq_label[n];
#endif /* GHMM_OBSOLETE */

    /* Initial distribution */
    /* 1. Viterbi-state */
#if 0
    /* wieder aktivieren, wenn ghmm_cmodel_viterbi realisiert */
    if (mode == viterbi_all || mode == viterbi_viterbi) {
      v_path = cviterbi (smo, sqd_short->seq[n], short_len, &log_p);
      if (v_path[short_len - 1] < 0 || v_path[short_len - 1] >= smo->N) {
        GHMM_LOG(LCONVERTED, "Warning:Error: from viterbi()\n");
        sq->seq_len[n] = short_len;
        m_realloc (sq->seq[n], short_len);
        continue;
      }
      m_memset (initial_distribution, 0, smo->N);
      initial_distribution[v_path[short_len - 1]] = 1.0;        /* all other 0 */
      m_free (v_path);
    }
#endif

    /* 2. Initial Distribution ???
       Pi(i) = alpha_t(i)/P(O|lambda) */
    if (mode == all_all || mode == all_viterbi) {
      if (short_len > 0) {
        if (ghmm_cmodel_forward (smo, sqd_short->seq[n], short_len, NULL /* ?? */ ,
                           alpha, scale, &log_p)) {
          GHMM_LOG_QUEUED(LCONVERTED);
          goto STOP;
        }
        sum = 0.0;
        for (i = 0; i < smo->N; i++) {
          /* alpha ist skaliert! */
          initial_distribution[i] = alpha[short_len - 1][i];
          sum += initial_distribution[i];
        }
        /* nicht ok.? auf eins skalieren? */
        for (i = 0; i < smo->N; i++)
          initial_distribution[i] /= sum;
      }
      else {
        for (i = 0; i < smo->N; i++)
          initial_distribution[i] = smo->s[i].pi;
      }
    }
    /* if short_len > 0:
       Initial state == final state from sqd_short; no output here
       else
       choose inittial state according to pi and do output
     */
    p = GHMM_RNG_UNIFORM (RNG);
    sum = 0.0;
    for (i = 0; i < smo->N; i++) {
      sum += initial_distribution[i];
      if (sum >= p)
        break;
    }
    /* error due to incorrect normalization ?? */
    if (i == smo->N) {
      i--;
      while (i > 0 && initial_distribution[i] == 0.0)
        i--;
    }
    t = 0;
    pos = t * smo->dim;
    if (short_len == 0) {
      /* Output in state i */
      p = GHMM_RNG_UNIFORM (RNG);
      sum = 0.0;
      for (m = 0; m < smo->M; m++) {
        sum += smo->s[i].c[m];
        if (sum >= p)
          break;
      }
      /* error due to incorrect normalization ?? */
      if (m == smo->M) {
        m--;
        while (m > 0 && smo->s[i].c[m] == 0.0)
          m--;
      }
      ghmm_cmodel_get_random_var(smo, i, m, sq->seq[n]+pos);

      if (smo->cos == 1) {
        class = 0;
      }
      else {
        if (!smo->class_change->get_class) {
          printf ("ERROR: get_class not initialized\n");
          goto STOP;
        }
        /*printf("1: cos = %d, k = %d, t = %d\n",smo->cos,smo->class_change->k,t);*/
        class = smo->class_change->get_class (smo, sq->seq[n], n, t);
      }


      t++;
      pos += smo->dim;
    }