int
fsg_model_add_silence(fsg_model_t * fsg, char const *silword,
                      int state, float32 silprob)
{
    int32 logsilp;
    int n_trans, silwid, src;

    E_INFO("Adding silence transitions for %s to FSG\n", silword);

    silwid = fsg_model_word_add(fsg, silword);
    logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw);
    if (fsg->silwords == NULL)
        fsg->silwords = bitvec_alloc(fsg->n_word_alloc);
    bitvec_set(fsg->silwords, silwid);

    n_trans = 0;
    if (state == -1) {
        for (src = 0; src < fsg->n_state; src++) {
            fsg_model_trans_add(fsg, src, src, logsilp, silwid);
            ++n_trans;
        }
    }
    else {
        fsg_model_trans_add(fsg, state, state, logsilp, silwid);
        ++n_trans;
    }

    E_INFO("Added %d silence word transitions\n", n_trans);
    return n_trans;
}
Example #2
0
int
fsg_model_add_alt(fsg_model_t * fsg, char const *baseword,
                  char const *altword)
{
    int i, basewid, altwid;
    int ntrans;

    /* FIXME: This will get slow, eventually... */
    for (basewid = 0; basewid < fsg->n_word; ++basewid)
        if (0 == strcmp(fsg->vocab[basewid], baseword))
            break;
    if (basewid == fsg->n_word) {
        E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword);
        return -1;
    }
    altwid = fsg_model_word_add(fsg, altword);
    if (fsg->altwords == NULL)
        fsg->altwords = bitvec_alloc(fsg->n_word_alloc);
    bitvec_set(fsg->altwords, altwid);

    E_DEBUG(2,("Adding alternate word transitions (%s,%s) to FSG\n",
               baseword, altword));

    /* Look for all transitions involving baseword and duplicate them. */
    /* FIXME: This will also get slow, eventually... */
    ntrans = 0;
    for (i = 0; i < fsg->n_state; ++i) {
        hash_iter_t *itor;
        if (fsg->trans[i].trans == NULL)
            continue;
        for (itor = hash_table_iter(fsg->trans[i].trans); itor;
             itor = hash_table_iter_next(itor)) {
            glist_t trans;
            gnode_t *gn;

            trans = hash_entry_val(itor->ent);
            for (gn = trans; gn; gn = gnode_next(gn)) {
                fsg_link_t *fl = gnode_ptr(gn);
                if (fl->wid == basewid) {
                    fsg_link_t *link;

                    /* Create transition object */
                    link = listelem_malloc(fsg->link_alloc);
                    link->from_state = fl->from_state;
                    link->to_state = fl->to_state;
                    link->logs2prob = fl->logs2prob; /* FIXME!!!??? */
                    link->wid = altwid;

                    trans =
                        glist_add_ptr(trans, (void *) link);
                    ++ntrans;
                }
            }
            hash_entry_val(itor->ent) = trans;
        }
    }

    E_DEBUG(2,("Added %d alternate word transitions\n", ntrans));
    return ntrans;
}
static void
get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win)
{
    int32 f, sf, ef;
    ps_latnode_t *node;

    if (!ngs->fwdtree) {
        ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
        return;
    }

    sf = frm - win;
    if (sf < 0)
        sf = 0;
    ef = frm + win;
    if (ef > ngs->n_frame)
        ef = ngs->n_frame;

    bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
    ngs->n_expand_words = 0;

    for (f = sf; f < ef; f++) {
        for (node = ngs->frm_wordlist[f]; node; node = node->next) {
            if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) {
                ngs->expand_word_list[ngs->n_expand_words++] = node->wid;
                bitvec_set(ngs->expand_word_flag, node->wid);
            }
        }
    }
    ngs->expand_word_list[ngs->n_expand_words] = -1;
    ngs->st.n_fwdflat_word_transition += ngs->n_expand_words;
}
Example #4
0
int handle_credfork(struct provmsg *msg, void *arg)
{
    struct provmsg_credfork *m = (struct provmsg_credfork *) msg;
    struct bitvec *alive = (struct bitvec *) arg;
    bitvec_set(alive, m->forked_cred);
    return 0;
}
static void
ngram_fwdflat_expand_all(ngram_search_t *ngs)
{
    int n_words, i;

    /* For all "real words" (not fillers or <s>/</s>) in the dictionary,
     *
     * 1) Add the ones which are in the LM to the fwdflat wordlist
     * 2) And to the expansion list (since we are expanding all)
     */
    ngs->n_expand_words = 0;
    n_words = ps_search_n_words(ngs);
    bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs));
    for (i = 0; i < n_words; ++i) {
        if (!ngram_model_set_known_wid(ngs->lmset,
                                       dict_basewid(ps_search_dict(ngs),i)))
            continue;
        ngs->fwdflat_wordlist[ngs->n_expand_words] = i;
        ngs->expand_word_list[ngs->n_expand_words] = i;
        bitvec_set(ngs->expand_word_flag, i);
        ngs->n_expand_words++;
    }
    E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words);
    ngs->expand_word_list[ngs->n_expand_words] = -1;
    ngs->fwdflat_wordlist[ngs->n_expand_words] = -1;
}
int main(int argc, char **argv) {
    bitvec_t *b = NULL;
    assert(!bitvec_alloc(&b, 1));

    bitvec_set(b, 0);
    assert(b->storage[0] == 0x1);

    return 0;
}
int
main(int argc, char *argv[])
{
	bitvec_t *bv;
	int i, j;
	clock_t c;

	TEST_ASSERT(bv = bitvec_alloc(199));
	bitvec_set(bv,198);
	bitvec_set(bv,0);
	bitvec_set(bv,42);
	bitvec_set(bv,43);
	bitvec_set(bv,44);
	TEST_ASSERT(bitvec_is_set(bv,198));
	TEST_ASSERT(bitvec_is_set(bv,0));
	TEST_ASSERT(bitvec_is_set(bv,42));
	TEST_ASSERT(bitvec_is_set(bv,43));
	TEST_ASSERT(bitvec_is_set(bv,44));
	TEST_EQUAL(5, bitvec_count_set(bv, 199));
	bitvec_clear(bv, 43);
	TEST_EQUAL(0, bitvec_is_set(bv,43));

	c = clock();
	for (j = 0; j < 1000000; ++j)
		bitvec_count_set(bv, 199);
	c = clock() - c;
	printf("1000000 * 199 bitvec_count_set in %.2f sec\n",
	       (double)c / CLOCKS_PER_SEC);
	bitvec_free(bv);

	bv = bitvec_alloc(1314);
	c = clock();
	for (j = 0; j < 50000; ++j)
		for (i = 0; i < 1314; ++i)
			bitvec_set(bv, i);
	c = clock() - c;
	printf("50000 * 1314 bitvec_set in %.2f sec\n",
	       (double)c / CLOCKS_PER_SEC);
	bitvec_free(bv);

	/* Test realloc */
	bv = bitvec_alloc(13);
	for (i = 1; i < 13; i+=2)
	    bitvec_set(bv, i);
	printf("Bits set %d\n", bitvec_count_set(bv, 13));
        TEST_EQUAL(6, bitvec_count_set(bv, 13));
	bv = bitvec_realloc(bv, 13, 2000);
	for (i = 0; i < 2000; i++) {
          /* printf("%d %d\n", i, bitvec_is_set(bv, i) != 0); */
	}
	printf("Bits set after realloc %d\n", bitvec_count_set(bv, 2000));
        TEST_EQUAL(6, bitvec_count_set(bv, 2000));
	bitvec_free(bv);

	return 0;
}
Example #8
0
static error make_collapsible_walk(ntree_t *t, void *opaque)
{
  error       err;
  treeview_t *tr = opaque;
  int         i;
  txtfmt_t   *tx;

  i = tr->walk.index + 1; /* index of node (0..N-1) */

  if (ntree_first_child(t))
  {
    err = bitvec_set(tr->openable, i);
    if (err)
      return err;

    err = bitvec_set(tr->open, i);
    if (err)
      return err;
  }

  // this bit might be temporary

  tx = ntree_get_data(t);

  // always wrap to ensure that \ns in the input are handled
  err = txtfmt_wrap(tx, tr->text_width);
  if (err)
    return err;

  if (txtfmt_get_height(tx) > 1)
  {
    err = bitvec_set(tr->multiline, i);
    if (err)
      return err;
  }

  tr->walk.index = i;

  return error_OK;
}
Example #9
0
static int32 senactive_to_mgauactive (acoustic_t *am)
{
    int32 n, s;
    gauden_t *gau;
    senone_t *sen;

    sen = am->sen;
    gau = am->gau;

    bitvec_clear_all(am->gauden_active, gau->n_mgau);
    n = 0;
    for (s = 0; s < sen->n_sen; s++) {
        if (bitvec_is_set (am->sen_active, s)) {
            bitvec_set (am->gauden_active, sen->sen2mgau[s]);
            n++;
        }
    }

    return n;
}
Example #10
0
static error select_bobs(filerwin    *fw,
                         int          x,
                         int          y,
                         int          c,
                         unsigned int flags,
                         void        *opaque)
{
  os_box b;

  NOT_USED(x);
  NOT_USED(y);
  NOT_USED(flags);
  NOT_USED(opaque);

  bitvec_set(fw->selection, c);

  index_to_area(fw, c, &b);
  wimp_force_redraw(fw->w, b.x0, b.y0, b.x1, b.y1);

  return error_OK;
}
Example #11
0
int DBSCAN(void *data, unsigned int *d, unsigned int dlen,
            float eps, unsigned int minpoints,
            unsigned int (*neighbours_search)(bitvec_t *out,
            void *, unsigned int, float, unsigned int *)
    ) {

    unsigned int cluster = 1;
    unsigned int count, i, j, k;

    bitvec_t *visited, *clustered, *neighbours, *neighbours2;

    bitvec_alloc(&visited, dlen);
    bitvec_alloc(&clustered, dlen);
    bitvec_alloc(&neighbours, dlen);
    bitvec_alloc(&neighbours2, dlen);

    // d is a list of identifiers
    for (i = 0; i < dlen; i++) {
        count = 0;

        // Already visited this point
        if (bitvec_check(visited, i)) continue;
        fprintf(stderr, "Cluster: %.2f\n", 100.0*i/dlen);

        // Mark this point as visited
        bitvec_set(visited, i);

        bitvec_clear_all(neighbours);

        // Get the first set of neighbours
        if(neighbours_search(neighbours, data, i, eps, &count)) {
            return 1;
        }
        if (count < minpoints) {
            *(d + i) = 0; // Noise
            continue;
        }

        *(d + i) = cluster;
        bitvec_set(clustered, i);

        // Expand the cluster
        for (j = 0; j < dlen; j++) {
            if(!bitvec_check(neighbours, j)) continue;

            if(!bitvec_check(visited, j)) {
                bitvec_set(visited, j);
                count = 0;
                bitvec_clear_all(neighbours2);
                if (neighbours_search(neighbours2, data, j, eps, &count)) {
                    return 1;
                }
                if (count >= minpoints) {
                    // Merge two bitarrays
                    bitvec_union(neighbours, neighbours2);
                    j = 0;
                }
            }
            if (!bitvec_check(clustered, j)) {
                *(d + j) = cluster;
                bitvec_set(clustered, j);
            }
        }

        cluster++;
    }

    bitvec_free(visited);
    bitvec_free(clustered);
    bitvec_free(neighbours);
    bitvec_free(neighbours2);

    return 0;

}
Example #12
0
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    lmset_t *lmset;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    int32 cisencnt;
    int32 j;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = NULL;

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      cmd_ln_str("-feat"),
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-lmctlfn"),
			      cmd_ln_str("-lmdumpdir"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_str("-senmgau"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-gs"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    if(kb->kbcore==NULL){
      E_FATAL("Initialization of kb failed\n");
    }

    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    lmset=kbcore_lmset(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);

    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm)))
	E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name);
      }
    }else if(lm){
      if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    }

    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID;
	lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID;

	for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;
	for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;

      }
    }else if(lm){ /* No LM is set at this point*/
      lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
      lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
      for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
      for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;

    }
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32));
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;

    E_INFO("Building lextrees\n");
    /* Get the number of lexical tree*/
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }

    /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));


    if(lmset){
      kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *));
      /* Just allocate pointers*/
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));

      for(i=0;i<kbcore_nlm(kbcore);i++){
	E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name);
	n=0;
	for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */
	  wp[j].wid=-1;
	  wp[j].prob=-1;
	}
	n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp);
	E_INFO("Size of word table after unigram + words in class: %d.\n",n);
	if (n < 1)
	  E_FATAL("%d active words in %s\n", n,lmset[i].name);
	n = wid_wordprob2alt(dict,wp,n);
	E_INFO("Size of word table after adding alternative prons: %d.\n",n);
	if (cmd_ln_int32("-treeugprob") == 0) {
	  for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
	}

	for (j = 0; j < kb->n_lextree; j++) {
	  kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc);
	  lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0;
	  E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n",
		 kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j]));
	}
      }

    }else if (lm){
      E_INFO("Creating Unigram Table\n");
      n=0;
      n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp);
      E_INFO("Size of word table after unigram + words in class: %d\n",n);
      if (n < 1)
	E_FATAL("%d active words\n", n);
      n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
      
      /* Retain or remove unigram probs from lextree, depending on option */
      if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	  wp[i].prob = -1;    	/* Flatten all initial probabilities */
      }
      
      /* Create the desired no. of unigram lextrees */
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
      for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
      }
      E_INFO("Lextrees(%d), %d nodes(ug)\n",
	     kb->n_lextree, lextree_n_node(kb->ugtree[0]));
    }



    /* Create filler lextrees */
    /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }


    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    ckd_free ((void *) lc);
    bitvec_free (lc_active);


    E_INFO("Lextrees(%d), %d nodes(filler)\n",
	     kb->n_lextree, 
	     lextree_n_node(kb->fillertree[0]));
    

    if (cmd_ln_int32("-lextreedump")) {
      if(lmset){
	E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n");
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "UGTREE %d\n", i);
	lextree_dump (kb->ugtree[i], dict, stderr);
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "FILLERTREE %d\n", i);
	lextree_dump (kb->fillertree[i], dict, stderr);
      }
      fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    /*Sections of optimization related parameters*/
    kb->ds_ratio=cmd_ln_int32("-ds");
    E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio);
    
    kb->rec_bstcid=-1;
    kb->skip_count=0;
    
    kb->cond_ds=cmd_ln_int32("-cond_ds");
    E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds);
    
    if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n");

    kb->gs4gs=cmd_ln_int32("-gs4gs");
    E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs);

    kb->svq4svq=cmd_ln_int32("-svq4svq");
    E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq);

    kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam"));
    E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam);
    if(kb->ci_pbeam>10000000){
      E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n");
    }
    
    kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam"));
    E_INFO("Word-end pruning beam: %d\n",kb->wend_beam);

    kb->pl_window=cmd_ln_int32("-pl_window");
    E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window);

	kb->pl_window_start=0;

    kb->pl_beam=logs3(cmd_ln_float32("-pl_beam"));
    E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam);

    for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) 
      ;

    kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32));
    kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32));
    kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32));
  

    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    ptmr_init (&(kb->tm_ovrhd));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

    if(lmset)
      n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    else
      n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }

    str = cmd_ln_str("-hyp");
    kb->matchfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
Example #13
0
dict2pid_t *
dict2pid_build(bin_mdef_t * mdef, dict_t * dict)
{
    dict2pid_t *dict2pid;
    s3ssid_t ***rdiph_rc;
    bitvec_t *ldiph, *rdiph, *single;
    int32 pronlen;
    int32 b, l, r, w, p;

    E_INFO("Building PID tables for dictionary\n");
    assert(mdef);
    assert(dict);

    dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t));
    dict2pid->refcount = 1;
    dict2pid->mdef = bin_mdef_retain(mdef);
    dict2pid->dict = dict_retain(dict);
    E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n",
           mdef->n_ciphone, sizeof(s3ssid_t),
           mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024);
    dict2pid->ldiph_lc =
        (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
                                     mdef->n_ciphone, sizeof(s3ssid_t));
    /* Only used internally to generate rssid */
    rdiph_rc =
        (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone,
                                     mdef->n_ciphone, sizeof(s3ssid_t));

    dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone,
                                                       mdef->n_ciphone,
                                                       mdef->n_ciphone,
                                                       sizeof
                                                       (s3ssid_t));
    /* Actually could use memset for this, if BAD_S3SSID is guaranteed
     * to be 65535... */
    for (b = 0; b < mdef->n_ciphone; ++b) {
        for (r = 0; r < mdef->n_ciphone; ++r) {
            for (l = 0; l < mdef->n_ciphone; ++l) {
                dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
                dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID;
                rdiph_rc[b][l][r] = BAD_S3SSID;
            }
        }
    }

    /* Track which diphones / ciphones have been seen. */
    ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
    rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone);
    single = bitvec_alloc(mdef->n_ciphone);

    for (w = 0; w < dict_size(dict2pid->dict); w++) {
        pronlen = dict_pronlen(dict, w);

        if (pronlen >= 2) {
            b = dict_first_phone(dict, w);
            r = dict_second_phone(dict, w);
            /* Populate ldiph_lc */
            if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) {
                /* Mark this diphone as done */
                bitvec_set(ldiph, b * mdef->n_ciphone + r);

                /* Record all possible ssids for b(?,r) */
                for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) {
                    p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
                                              (s3cipid_t) l, (s3cipid_t) r,
                                              WORD_POSN_BEGIN);
                    dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p);
                }
            }


            /* Populate rdiph_rc */
            l = dict_second_last_phone(dict, w);
            b = dict_last_phone(dict, w);
            if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) {
                /* Mark this diphone as done */
                bitvec_set(rdiph, b * mdef->n_ciphone + l);

                for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) {
                    p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b,
                                              (s3cipid_t) l, (s3cipid_t) r,
                                              WORD_POSN_END);
                    rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p);
                }
            }
        }
        else if (pronlen == 1) {
            b = dict_pron(dict, w, 0);
            E_DEBUG(1,("Building tables for single phone word %s phone %d = %s\n",
                       dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b)));
            /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */
            if (bitvec_is_clear(single, b)) {
                populate_lrdiph(dict2pid, rdiph_rc, b);
                bitvec_set(single, b);
            }
        }
    }

    bitvec_free(ldiph);
    bitvec_free(rdiph);
    bitvec_free(single);

    /* Try to compress rdiph_rc into rdiph_rc_compressed */
    compress_right_context_tree(dict2pid, rdiph_rc);
    compress_left_right_context_tree(dict2pid);

    ckd_free_3d(rdiph_rc);

    dict2pid_report(dict2pid);
    return dict2pid;
}
Example #14
0
File: kb.c Project: 10v/cmusphinx
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      "1s_c_d_dd",    /* Hack!! Hardwired constant 
						for -feat argument */
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    
    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);
    if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
    lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
    for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    E_INFO("Building lextrees\n");
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), 
							sizeof(int32));
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));
    n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp);
    if (n < 1)
	E_FATAL("%d active words\n", n);
    n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
    
    /* Retain or remove unigram probs from lextree, depending on option */
    if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
    }
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;
    
    /* Create the desired no. of unigram lextrees */
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }
    kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
    }
    bitvec_free (lc_active);
    ckd_free ((void *) lc);
    
    /* Create filler lextrees */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }
    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    
    E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n",
	   kb->n_lextree, lextree_n_node(kb->ugtree[0]), 
			lextree_n_node(kb->fillertree[0]));
    
    if (cmd_ln_int32("-lextreedump")) {
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "UGTREE %d\n", i);
	    lextree_dump (kb->ugtree[i], dict, stderr);
	}
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "FILLERTREE %d\n", i);
	    lextree_dump (kb->fillertree[i], dict, stderr);
	}
	fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");
    n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef WIN32
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
float64 vector_vqgen (float32 **data, int32 rows, int32 cols, int32 vqrows,
		      float64 epsilon, int32 maxiter,
		      float32 **mean, int32 *map)
{
    int32 i, j, r, it;
    static uint32 seed = 1;
    float64 sqerr, prev_sqerr=0, t;
    bitvec_t sel;
    int32 *count;
    float32 *gmean;
    ptmr_t tm;
    
    assert ((rows >= vqrows) && (maxiter >= 0) && (epsilon > 0.0));
    
    sel = bitvec_alloc (rows);
    
    ptmr_init (&tm);
    ptmr_start (&tm);
    
    /* Pick a random initial set of centroids */
#ifndef WIN32			/* RAH */
    srandom (seed);
    seed ^= random();
#else  /* RAH */
      srand ((unsigned) time(NULL)); /* RAH */
#endif
    for (i = 0; i < vqrows; i++) {
	/* Find r = a random, previously unselected row from the input */

#ifndef WIN32			/* RAH */
	r = (random() & (int32)0x7fffffff) % rows;
#else  /* RAH */
	r = (rand() & (int32)0x7fffffff) % rows; /* RAH */
#endif /* RAH */
	while (bitvec_is_set (sel, r)) {	/* BUG: possible infinite loop!! */
	    if (++r >= rows)
		r = 0;
	}
	bitvec_set (sel, r);
	
	memcpy ((void *)(mean[i]), (void *)(data[r]), cols * sizeof(float32));
	/* BUG: What if two randomly selected rows are identical in content?? */
    }
    bitvec_free (sel);
    
    count = (int32 *) ckd_calloc (vqrows, sizeof(int32));
    
    /* In k-means, unmapped means in any iteration are a problem.  Replace them with gmean */
    gmean = (float32 *) ckd_calloc (cols, sizeof(float32));
    vector_mean (gmean, mean, vqrows, cols);

    for (it = 0;; it++) {		/* Iterations of k-means algorithm */
	/* Find the current data->mean mappings (labels) */
	sqerr = 0.0;
	for (i = 0; i < rows; i++) {
	    map[i] = vector_vqlabel (data[i], mean, vqrows, cols, &t);
	    sqerr += t;
	}
	ptmr_stop(&tm);
	
	if (it == 0)
	    E_INFO("Iter %4d: %.1fs CPU; sqerr= %e\n", it, tm.t_cpu, sqerr);
	else
	    E_INFO("Iter %4d: %.1fs CPU; sqerr= %e; delta= %e\n",
		   it, tm.t_cpu, sqerr, (prev_sqerr-sqerr)/prev_sqerr);
	
	/* Check if exit condition satisfied */
	if ((sqerr == 0.0) || (it >= maxiter-1) ||
	    ((it > 0) && ( ((prev_sqerr - sqerr) / prev_sqerr) < epsilon )) )
	    break;
	prev_sqerr = sqerr;
	
	ptmr_start(&tm);
	
	/* Update (reestimate) means */
	for (i = 0; i < vqrows; i++) {
	    for (j = 0; j < cols; j++)
		mean[i][j] = 0.0;
	    count[i] = 0;
	}
	for (i = 0; i < rows; i++) {
	    vector_accum (mean[map[i]], data[i], cols);
	    count[map[i]]++;
	}
	for (i = 0; i < vqrows; i++) {
	    if (count[i] > 1) {
		t = 1.0 / (float64)(count[i]);
		for (j = 0; j < cols; j++)
		  /*		  mean[i][j] *= t; */ /* RAH, compiler was complaining about this,  */
		  mean[i][j] = (float32) ((float64) mean[i][j] * (float64) t); /*  */
	    } else if (count[i] == 0) {
		E_ERROR("Iter %d: mean[%d] unmapped\n", it, i);
		memcpy (mean[i], gmean, cols * sizeof(float32));
	    }
	}
    }
    
    ckd_free (count);
    ckd_free (gmean);
    
    return sqerr;
}
Example #16
0
/*
 * All the heavy lifting happens here.  Checks to see if this inode is a hard
 * link to one we've already encountered, and if not, issues inode_alloc and
 * setattr messages.  Then, regardless of the check, sends an inode_link message
 * to "register" the filename.
 */
static int callback(const char *fpath, const struct stat *sb, int typeflag,
		struct FTW *ftwbuf)
{
	struct provmsg_inode_alloc allocmsg;
	struct provmsg_setattr attrmsg;
	struct provmsg_link linkmsg;
	char *parent_path;
	const char *fname;
	struct stat parent;
	int fname_len;

	/* Set up early so we can copy larger chunks to other messages */
	linkmsg.inode.sb_uuid = uuid;
	linkmsg.inode.ino = sb->st_ino;

	if (!bitvec_test(&inodes, sb->st_ino)) {
		bitvec_set(&inodes, sb->st_ino);

		/* Write allocation info */
		msg_initlen(&allocmsg.msg, sizeof(allocmsg));
		allocmsg.msg.type = PROVMSG_INODE_ALLOC;
		allocmsg.msg.cred_id = 0;
		allocmsg.inode = linkmsg.inode;
		if (gzwrite(loggz, &allocmsg, sizeof allocmsg) < sizeof allocmsg) {
			perror("gzwrite");
			return 1;
		}

		/* Write attribute info */
		msg_initlen(&attrmsg.msg, sizeof(attrmsg));
		attrmsg.msg.type = PROVMSG_SETATTR;
		attrmsg.msg.cred_id = 0;
		attrmsg.inode = linkmsg.inode;
		attrmsg.mode = sb->st_mode;
		attrmsg.uid = sb->st_uid;
		attrmsg.gid = sb->st_gid;
		if (gzwrite(loggz, &attrmsg, sizeof(attrmsg)) < sizeof(attrmsg)) {
			perror("gzwrite");
			return 1;
		}
	}

	/* Send link (i.e. filename) message */
	linkmsg.msg.type = PROVMSG_LINK;
	linkmsg.msg.cred_id = 0;
	/* .inode field was set up earlier */
	if (ftwbuf->level > 0) {
		fname = strndup(fpath, PATH_MAX);
		parent_path = strndup(fpath, PATH_MAX);
		if (stat(dirname(parent_path), &parent)) {
			perror("stat");
			return 1;
		}
		linkmsg.dir = parent.st_ino;
		fname = basename(fname);
		fname_len = strlen(fname);
	} else {
		/* Stick with convention that root dir is "its own parent" */
		linkmsg.dir = linkmsg.inode.ino;
		fname_len = 0;
	}
	msg_initlen(&linkmsg.msg, sizeof(linkmsg) + fname_len);
	if (gzwrite(loggz, &linkmsg, sizeof(linkmsg)) < sizeof(linkmsg)) {
		perror("gzwrite");
		return 1;
	}
	if (fname_len > 0 && gzwrite(loggz, fname, fname_len) < fname_len) {
		perror("gzwrite");
		return 1;
	}

	return 0;
}
static void
fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx)
{
    int32 i, cf, nf, w, pip, newscore, thresh, wordthresh;
    int32 *awl;
    root_chan_t *rhmm;
    chan_t *hmm, *nexthmm;

    cf = frame_idx;
    nf = cf + 1;
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));

    thresh = ngs->best_score + ngs->fwdflatbeam;
    wordthresh = ngs->best_score + ngs->fwdflatwbeam;
    pip = ngs->pip;
    E_DEBUG(3,("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh));

    /* Scan all active words. */
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        /* Propagate active root channels */
        if (hmm_frame(&rhmm->hmm) == cf
            && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) {
            hmm_frame(&rhmm->hmm) = nf;
            bitvec_set(ngs->word_active, w);

            /* Transitions out of root channel */
            newscore = hmm_out_score(&rhmm->hmm);
            if (rhmm->next) {
                assert(!dict_is_single_phone(ps_search_dict(ngs), w));

                newscore += pip;
                if (newscore BETTER_THAN thresh) {
                    hmm = rhmm->next;
                    /* Enter all right context phones */
                    if (hmm->info.rc_id >= 0) {
                        for (; hmm; hmm = hmm->next) {
                            if ((hmm_frame(&hmm->hmm) < cf)
                                || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
                                hmm_enter(&hmm->hmm, newscore,
                                          hmm_out_history(&rhmm->hmm), nf);
                            }
                        }
                    }
                    /* Just a normal word internal phone */
                    else {
                        if ((hmm_frame(&hmm->hmm) < cf)
                            || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) {
                                hmm_enter(&hmm->hmm, newscore,
                                          hmm_out_history(&rhmm->hmm), nf);
                        }
                    }
                }
            }
            else {
                assert(dict_is_single_phone(ps_search_dict(ngs), w));

                /* Word exit for single-phone words (where did their
                 * whmms come from?) (either from
                 * ngram_search_fwdtree, or from
                 * ngram_fwdflat_allocate_1ph(), that's where) */
                if (newscore BETTER_THAN wordthresh) {
                    ngram_search_save_bp(ngs, cf, w, newscore,
                                         hmm_out_history(&rhmm->hmm), 0);
                }
            }
        }

        /* Transitions out of non-root channels. */
        for (hmm = rhmm->next; hmm; hmm = hmm->next) {
            if (hmm_frame(&hmm->hmm) >= cf) {
                /* Propagate forward HMMs inside the beam. */
                if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) {
                    hmm_frame(&hmm->hmm) = nf;
                    bitvec_set(ngs->word_active, w);

                    newscore = hmm_out_score(&hmm->hmm);
                    /* Word-internal phones */
                    if (hmm->info.rc_id < 0) {
                        newscore += pip;
                        if (newscore BETTER_THAN thresh) {
                            nexthmm = hmm->next;
                            /* Enter all right-context phones. */
                            if (nexthmm->info.rc_id >= 0) {
                                 for (; nexthmm; nexthmm = nexthmm->next) {
                                    if ((hmm_frame(&nexthmm->hmm) < cf)
                                        || (newscore BETTER_THAN
                                            hmm_in_score(&nexthmm->hmm))) {
                                        hmm_enter(&nexthmm->hmm,
                                                  newscore,
                                                  hmm_out_history(&hmm->hmm),
                                                  nf);
                                    }
                                }
                            }
                            /* Enter single word-internal phone. */
                            else {
                                if ((hmm_frame(&nexthmm->hmm) < cf)
                                    || (newscore BETTER_THAN
                                        hmm_in_score(&nexthmm->hmm))) {
                                    hmm_enter(&nexthmm->hmm, newscore,
                                              hmm_out_history(&hmm->hmm), nf);
                                }
                            }
                        }
                    }
                    /* Right-context phones - apply word beam and exit. */
                    else {
                        if (newscore BETTER_THAN wordthresh) {
                            ngram_search_save_bp(ngs, cf, w, newscore,
                                                 hmm_out_history(&hmm->hmm),
                                                 hmm->info.rc_id);
                        }
                    }
                }
                /* Zero out inactive HMMs. */
                else if (hmm_frame(&hmm->hmm) != nf) {
                    hmm_clear_scores(&hmm->hmm);
                }
            }
        }
    }
}
/**
 * Find all active words in backpointer table and sort by frame.
 */
static void
build_fwdflat_wordlist(ngram_search_t *ngs)
{
    int32 i, f, sf, ef, wid, nwd;
    bptbl_t *bp;
    ps_latnode_t *node, *prevnode, *nextnode;

    /* No tree-search, use statically allocated wordlist. */
    if (!ngs->fwdtree)
        return;

    memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist));

    /* Scan the backpointer table for all active words and record
     * their exit frames. */
    for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) {
        sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1;
        ef = bp->frame;
        wid = bp->wid;

        /* Anything that can be transitioned to in the LM can go in
         * the word list. */
        if (!ngram_model_set_known_wid(ngs->lmset,
                                       dict_basewid(ps_search_dict(ngs), wid)))
            continue;

        /* Look for it in the wordlist. */
        for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid);
             node = node->next);

        /* Update last end frame. */
        if (node)
            node->lef = ef;
        else {
            /* New node; link to head of list */
            node = listelem_malloc(ngs->latnode_alloc);
            node->wid = wid;
            node->fef = node->lef = ef;

            node->next = ngs->frm_wordlist[sf];
            ngs->frm_wordlist[sf] = node;
        }
    }

    /* Eliminate "unlikely" words, for which there are too few end points */
    for (f = 0; f < ngs->n_frame; f++) {
        prevnode = NULL;
        for (node = ngs->frm_wordlist[f]; node; node = nextnode) {
            nextnode = node->next;
            /* Word has too few endpoints */
            if ((node->lef - node->fef < ngs->min_ef_width) ||
                /* Word is </s> and doesn't actually end in last frame */
                ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) {
                if (!prevnode)
                    ngs->frm_wordlist[f] = nextnode;
                else
                    prevnode->next = nextnode;
                listelem_free(ngs->latnode_alloc, node);
            }
            else
                prevnode = node;
        }
    }

    /* Form overall wordlist for 2nd pass */
    nwd = 0;
    bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs));
    for (f = 0; f < ngs->n_frame; f++) {
        for (node = ngs->frm_wordlist[f]; node; node = node->next) {
            if (!bitvec_is_set(ngs->word_active, node->wid)) {
                bitvec_set(ngs->word_active, node->wid);
                ngs->fwdflat_wordlist[nwd++] = node->wid;
            }
        }
    }
    ngs->fwdflat_wordlist[nwd] = -1;
    E_INFO("Utterance vocabulary contains %d words\n", nwd);
}
Example #19
0
File: kb.c Project: wdebeaum/cabot
/* Update kb w/ new dictionary and new LM.
 * assumes: single-LM kbcore (before & after)
 * requires: updating kbcore
 * Lucian Galescu, 08/11/2005
 */
void kb_update_lm(kb_t *kb, char *dictfile, char *lmfile)
{
  kbcore_t *kbcore;
  mdef_t *mdef;
  dict_t *dict;
  dict2pid_t *d2p;
  lm_t *lm;
  s3cipid_t ci;
  s3wid_t w;
  int32 i, n, n_lc;
  wordprob_t *wp;
  s3cipid_t *lc;
  bitvec_t lc_active;
  
  /*** clean up ***/
  vithist_t *vithist = kb->vithist;

  if (kb->fillertree) 
    ckd_free ((void *)kb->fillertree);
  if (kb->hmm_hist) 
    ckd_free ((void *)kb->hmm_hist);
  
  
  /* vithist */
  if (vithist) {
    ckd_free ((void *) vithist->entry);
    ckd_free ((void *) vithist->frame_start);
    ckd_free ((void *) vithist->bestscore);
    ckd_free ((void *) vithist->bestvh);
    ckd_free ((void *) vithist->lms2vh_root);    
    ckd_free ((void *) kb->vithist);
  }
  
  /*** re-initialize ***/
  
  kb->kbcore = kbcore_update_lm(kb->kbcore, 
                                dictfile, 
                                cmd_ln_str("-fdict"),
                                "",	/* Hack!! Hardwired constant for -compsep argument */
                                lmfile,
                                cmd_ln_str("-fillpen"),
                                cmd_ln_float32("-silprob"),
                                cmd_ln_float32("-fillprob"),
                                cmd_ln_float32("-lw"),
                                cmd_ln_float32("-wip"),
                                cmd_ln_float32("-uw"));
  if(kb->kbcore==NULL){
    E_FATAL("Updating kbcore failed\n");
  }
  
  kbcore = kb->kbcore;
  
  mdef = kbcore_mdef(kbcore);
  dict = kbcore_dict(kbcore);
  lm = kbcore_lm(kbcore);
  d2p = kbcore_dict2pid(kbcore);
  
  if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
    E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);
  
  if(lm){
    if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
      E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
  }
  
  /*
   * Unlink <s> and </s> between dictionary and LM, to prevent their 
   * recognition.  They are merely dummy words (anchors) at the beginning 
   * and end of each utterance.
   */
  if(lm){
    lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
    lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
    for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
      kbcore->dict2lmwid[w] = BAD_S3LMWID;
    for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
      kbcore->dict2lmwid[w] = BAD_S3LMWID;
  }
  
  /* Build set of all possible left contexts */
  lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
  lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
  for (w = 0; w < dict_size (dict); w++) {
    ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
    if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
  }
  ci = mdef_silphone(mdef);
  bitvec_set (lc_active, ci);
  for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
    if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
  }
  lc[n_lc] = BAD_S3CIPID;
  
  E_INFO("Building lextrees\n");
  /* Get the number of lexical tree*/
  kb->n_lextree = cmd_ln_int32 ("-Nlextree");
  if (kb->n_lextree < 1) {
    E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
            kb->n_lextree);
    kb->n_lextree = 1;
  }
  
  /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
  /* Build active word list */
  wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));
  
  
  if (lm) {
    E_INFO("Creating Unigram Table\n");
    n=0;
    n = lm_ug_wordprob (lm, dict, MAX_NEG_INT32, wp);
    E_INFO("Size of word table after unigram + words in class: %d\n",n);
    if (n < 1)
      E_FATAL("%d active words\n", n);
    n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
    
    /* Retain or remove unigram probs from lextree, depending on option */
    if (cmd_ln_int32("-treeugprob") == 0) {
      for (i = 0; i < n; i++)
        wp[i].prob = -1;    	/* Flatten all initial probabilities */
    }
    
    /* Create the desired no. of unigram lextrees */
    kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
    for (i = 0; i < kb->n_lextree; i++) {
      kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
      lextree_type (kb->ugtree[i]) = 0;
    }
    E_INFO("Lextrees(%d), %d nodes(ug)\n",
           kb->n_lextree, lextree_n_node(kb->ugtree[0]));
  }

  /* Create filler lextrees */
  /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
  n = 0;
  for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
    if (dict_filler_word(dict, i)) {
      wp[n].wid = i;
      wp[n].prob = fillpen (kbcore->fillpen, i);
      n++;
    }
  }


  kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
  for (i = 0; i < kb->n_lextree; i++) {
    kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
    lextree_type (kb->fillertree[i]) = -1;
  }
  ckd_free ((void *) wp);
  ckd_free ((void *) lc);
  bitvec_free (lc_active);


  E_INFO("Lextrees(%d), %d nodes(filler)\n",
         kb->n_lextree, 
         lextree_n_node(kb->fillertree[0]));

  if (cmd_ln_int32("-lextreedump")) {
    for (i = 0; i < kb->n_lextree; i++) {
      fprintf (stderr, "UGTREE %d\n", i);
      lextree_dump (kb->ugtree[i], dict, stderr);
    }
    for (i = 0; i < kb->n_lextree; i++) {
      fprintf (stderr, "FILLERTREE %d\n", i);
      lextree_dump (kb->fillertree[i], dict, stderr);
    }
    fflush (stderr);
  }

  kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
                        kbcore->dict2pid->n_comstate);

  kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));

  kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

  n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

  n /= kb->hmm_hist_binsize;
  kb->hmm_hist_bins = n+1;
  kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */

}
Example #20
0
subvq_t *subvq_init (char *file)
{
    FILE *fp;
    char line[16384];
    int32 n_sv;
    int32 s, k, n, r, c;
    char *strp;
    subvq_t *vq;
    
    E_INFO("Loading Mixture Gaussian sub-VQ file '%s'\n", file);
    
    vq = (subvq_t *) ckd_calloc (1, sizeof(subvq_t));
    
    fp = myfopen(file, "r");
    
    /* Read until "Sub-vectors" */
    for (;;) {
	if (fgets (line, sizeof(line), fp) == NULL)
	    E_FATAL("Failed to read VQParam header\n");
	if (sscanf (line, "VQParam %d %d -> %d %d",
		    &(vq->origsize.r), &(vq->origsize.c), &(vq->n_sv), &(vq->vqsize)) == 4)
	    break;
    }
    
    n_sv = vq->n_sv;
    
    vq->svsize = (int32 *) ckd_calloc (n_sv, sizeof(int32));
    vq->featdim = (int32 **) ckd_calloc (n_sv, sizeof(int32 *));
    vq->mean = (float32 ***) ckd_calloc (n_sv, sizeof(float32 **));
    vq->var  = (float32 ***) ckd_calloc (n_sv, sizeof(float32 **));
    vq->map = (int32 ***) ckd_calloc_3d (vq->origsize.r, vq->origsize.c, n_sv, sizeof(int32));
    vq->cb_invalid = bitvec_alloc (vq->origsize.r);
    
    /* Read subvector sizes and feature dimension maps */
    for (s = 0; s < n_sv; s++) {
	if ((fgets (line, sizeof(line), fp) == NULL) ||
	    (sscanf (line, "Subvector %d length %d%n", &k, &(vq->svsize[s]), &n) != 2) ||
	    (k != s))
	    E_FATAL("Error reading length(subvector %d)\n", s);
	
	vq->mean[s] = (float32 **) ckd_calloc_2d (vq->vqsize, vq->svsize[s], sizeof(float32));
	vq->var[s]  = (float32 **) ckd_calloc_2d (vq->vqsize, vq->svsize[s], sizeof(float32));
	vq->featdim[s] = (int32 *) ckd_calloc (vq->svsize[s], sizeof(int32));
	
	for (strp = line+n, c = 0; c < vq->svsize[s]; c++) {
	    if (sscanf (strp, "%d%n", &(vq->featdim[s][c]), &n) != 1)
		E_FATAL("Error reading subvector(%d).featdim(%d)\n", s, c);
	    strp += n;
	}
    }
    
    /* Echo info for sanity check */
    E_INFO("Original #codebooks(states)/codewords: %d x %d\n", vq->origsize.r, vq->origsize.c);
    E_INFO("Subvectors: %d, VQsize: %d\n", vq->n_sv, vq->vqsize);
    for (s = 0; s < n_sv; s++) {
	E_INFO("Feature dims(%d): ", s);
	for (c = 0; c < vq->svsize[s]; c++)
	    printf (" %2d", vq->featdim[s][c]);
	printf (" (%d)\n", vq->svsize[s]);
    }
    
    /* Read VQ codebooks and maps for each subvector */
    for (s = 0; s < n_sv; s++) {
	E_INFO("Reading subvq %d\n", s);
	
	E_INFO("Reading codebook\n");
	if ((fgets (line, sizeof(line), fp) == NULL) ||
	    (sscanf (line, "Codebook %d", &k) != 1) || (k != s))
	    E_FATAL("Error reading header\n", s);
	
	for (r = 0; r < vq->vqsize; r++) {
	    if (fgets (line, sizeof(line), fp) == NULL)
		E_FATAL("Error reading row(%d)\n", r);
	    
	    for (strp = line, c = 0; c < vq->svsize[s]; c++) {
		if (sscanf (strp, "%f %f%n", &(vq->mean[s][r][c]), &(vq->var[s][r][c]), &k) != 2)
		    E_FATAL("Error reading row(%d) col(%d)\n", r, c);
		strp += k;
	    }
	}
	
#if 0
	E_INFO("Sanity check: mean[0,%d]:\n", vq->vqsize-1);
	vector_print (stdout, vq->mean[s][0], vq->svsize[s]);
	vector_print (stdout, vq->mean[s][vq->vqsize-1], vq->svsize[s]);
	E_INFO("Sanity check: var[0,%d]:\n", vq->vqsize-1);
	vector_print (stdout, vq->var[s][0], vq->svsize[s]);
	vector_print (stdout, vq->var[s][vq->vqsize-1], vq->svsize[s]);
#endif

	E_INFO("Reading map\n");
	if ((fgets (line, sizeof(line), fp) == NULL) ||
	    (sscanf (line, "Map %d", &k) != 1) || (k != s))
	    E_FATAL("Error reading header\n", s);
	
	for (r = 0; r < vq->origsize.r; r++) {
	    if (fgets (line, sizeof(line), fp) == NULL)
		E_FATAL("Error reading row(%d)\n", r);
	    
	    for (strp = line, c = 0; c < vq->origsize.c; c++) {
		if (sscanf (strp, "%d%n", &(vq->map[r][c][s]), &k) != 1)
		    E_FATAL("Error reading row(%d) col(%d)\n", r, c);
		strp += k;
	    }
	}
	
#if 0
	E_INFO("Sanity check: map[0][0]:\n");
	for (c = 0; c < vq->origsize.c; c++)
	    printf (" %d", vq->map[0][c][s]);
	printf ("\n");
#endif
	fflush (stdout);
    }
    
    if ((fscanf (fp, "%s", line) != 1) || (strcmp (line, "End") != 0))
	E_FATAL("Error reading 'End' token\n");
    
    fclose (fp);

    subvq_ivar_idet_precompute (vq, 0.0001 /* varfloor */);

#if 0    
    E_INFO("Sanity check: var[*,0]:\n");
    for (s = 0; s < n_sv; s++)
	vector_print (stdout, vq->var[s][0], vq->svsize[s]);
#endif

    /* Replace invalid entries in map with duplicate of a valid entry, if possible */
    for (r = 0; r < vq->origsize.r; r++) {
	k = -1;
	for (c = 0; c < vq->origsize.c; c++) {
	    if (vq->map[r][c][0] < 0) {
		/* All ought to be < 0 */
		for (s = 1; s < vq->n_sv; s++) {
		    if (vq->map[r][c][s] >= 0)
			E_FATAL("Partially undefined map[%d][%d]\n", r, c);
		}
	    } else {
		/* All ought to be >= 0 */
		for (s = 1; s < vq->n_sv; s++) {
		    if (vq->map[r][c][s] < 0)
			E_FATAL("Partially undefined map[%d][%d]\n", r, c);
		}
		k = c;	/* A valid codeword found; remember it */
	    }
	}
	
	if (k >= 0) {
	    /* Copy k into invalid rows */
	    for (c = 0; c < vq->origsize.c; c++) {
		if (vq->map[r][c][0] < 0) {
		    for (s = 0; s < vq->n_sv; s++)
			vq->map[r][c][s] = vq->map[r][k][s];
		}
	    }
	    bitvec_clear (vq->cb_invalid, r);
	} else
	    bitvec_set (vq->cb_invalid, r);
    }
    
    return vq;
}
Example #21
0
File: main.c Project: 10v/cmusphinx
main (int32 argc, char *argv[])
{
    kb_t kb;
    kbcore_t *kbcore;
    bitvec_t active;
    int32 w;
    
    cmd_ln_parse (arglist, argc, argv);
    unlimit();
    
    kbcore = kbcore_init (cmd_ln_float32("-logbase"),
			  cmd_ln_str("-feat"),
			  cmd_ln_str("-mdef"),
			  cmd_ln_str("-dict"),
			  cmd_ln_str("-fdict"),
			  cmd_ln_str("-compsep"),
			  cmd_ln_str("-lm"),
			  cmd_ln_str("-fillpen"),
			  cmd_ln_float32("-silprob"),
			  cmd_ln_float32("-fillprob"),
			  cmd_ln_float32("-lw"),
			  cmd_ln_float32("-wip"),
			  cmd_ln_str("-mean"),
			  cmd_ln_str("-var"),
			  cmd_ln_float32("-varfloor"),
			  cmd_ln_str("-senmgau"),
			  cmd_ln_str("-mixw"),
			  cmd_ln_float32("-mixwfloor"),
			  cmd_ln_str("-tmat"),
			  cmd_ln_float32("-tmatfloor"));
    
    /* Here's the perfect candidate for inheritance */
    kb.mdef = kbcore->mdef;
    kb.dict = kbcore->dict;
    kb.lm = kbcore->lm;
    kb.fillpen = kbcore->fillpen;
    kb.tmat = kbcore->tmat;
    kb.dict2lmwid = kbcore->dict2lmwid;
    
    if ((kb.am = acoustic_init (kbcore->fcb, kbcore->gau, kbcore->sen, cmd_ln_float32("-mgaubeam"),
				S3_MAX_FRAMES)) == NULL) {
	E_FATAL("Acoustic models initialization failed\n");
    }
    kb.beam = logs3 (cmd_ln_float64("-beam"));
    kb.wordbeam = logs3 (cmd_ln_float64("-wordbeam"));
    kb.wordmax = cmd_ln_int32("-wordmax");
    
    /* Mark the active words and build lextree */
    active = bitvec_alloc (dict_size (kb.dict));
    bitvec_clear_all (active, dict_size(kb.dict));
    for (w = 0; w < dict_size(kb.dict); w++) {
	if (IS_LMWID(kb.dict2lmwid[w]) || dict_filler_word (kb.dict, w))
	    bitvec_set (active, w);
    }
    kb.lextree_root = lextree_build (kb.dict, kb.mdef, active, cmd_ln_int32("-flatdepth"));
    
    kb.vithist = (glist_t *) ckd_calloc (S3_MAX_FRAMES+2, sizeof(glist_t));
    kb.vithist++;	/* Allow for dummy frame -1 for start word */
    kb.lextree_active = NULL;
    kb.wd_last_sf = (int32 *) ckd_calloc (dict_size(kb.dict), sizeof(int32));
    
    kb.tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t));
    kb.tm_search = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t));
    
    ctl_process (cmd_ln_str("-ctl"), cmd_ln_int32("-ctloffset"), cmd_ln_int32("-ctlcount"),
		 decode_utt, &kb);
    
    exit(0);
}
static void
fwdflat_word_transition(ngram_search_t *ngs, int frame_idx)
{
    int32 cf, nf, b, thresh, pip, i, w, newscore;
    int32 best_silrc_score = 0, best_silrc_bp = 0;      /* FIXME: good defaults? */
    bptbl_t *bp;
    int32 *rcss;
    root_chan_t *rhmm;
    int32 *awl;
    float32 lwf;
    dict_t *dict = ps_search_dict(ngs);
    dict2pid_t *d2p = ps_search_dict2pid(ngs);

    cf = frame_idx;
    nf = cf + 1;
    thresh = ngs->best_score + ngs->fwdflatbeam;
    pip = ngs->pip;
    best_silrc_score = WORST_SCORE;
    lwf = ngs->fwdflat_fwdtree_lw_ratio;

    /* Search for all words starting within a window of this frame.
     * These are the successors for words exiting now. */
    get_expand_wordlist(ngs, cf, ngs->max_sf_win);

    /* Scan words exited in current frame */
    for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) {
        xwdssid_t *rssid;
        int32 silscore;

        bp = ngs->bp_table + b;
        ngs->word_lat_idx[bp->wid] = NO_BP;

        if (bp->wid == ps_search_finish_wid(ngs))
            continue;

        /* DICT2PID location */
        /* Get the mapping from right context phone ID to index in the
         * right context table and the bscore_stack. */
        rcss = ngs->bscore_stack + bp->s_idx;
        if (bp->last2_phone == -1)
            rssid = NULL;
        else
            rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone);

        /* Transition to all successor words. */
        for (i = 0; ngs->expand_word_list[i] >= 0; i++) {
            int32 n_used;

            w = ngs->expand_word_list[i];

            /* Get the exit score we recorded in save_bwd_ptr(), or
             * something approximating it. */
            if (rssid)
                newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]];
            else
                newscore = bp->score;
            if (newscore == WORST_SCORE)
                continue;
            /* FIXME: Floating point... */
            newscore += lwf
                * (ngram_tg_score(ngs->lmset,
                                  dict_basewid(dict, w),
                                  bp->real_wid,
                                  bp->prev_real_wid,
                                  &n_used) >> SENSCR_SHIFT);
            newscore += pip;

            /* Enter the next word */
            if (newscore BETTER_THAN thresh) {
                rhmm = (root_chan_t *) ngs->word_chan[w];
                if ((hmm_frame(&rhmm->hmm) < cf)
                    || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                    hmm_enter(&rhmm->hmm, newscore, b, nf);
                    /* DICT2PID: This is where mpx ssids get introduced. */
                    /* Look up the ssid to use when entering this mpx triphone. */
                    hmm_mpx_ssid(&rhmm->hmm, 0) =
                        dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone,
                                          dict_last_phone(dict, bp->wid));
                    assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0)));
                    E_DEBUG(6,("ssid %d(%d,%d) = %d\n",
                               rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone,
                               hmm_mpx_ssid(&rhmm->hmm, 0)));
                    bitvec_set(ngs->word_active, w);
                }
            }
        }

        /* Get the best exit into silence. */
        if (rssid)
            silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]];
        else
            silscore = bp->score;
        if (silscore BETTER_THAN best_silrc_score) {
            best_silrc_score = silscore;
            best_silrc_bp = b;
        }
    }

    /* Transition to <sil> */
    newscore = best_silrc_score + ngs->silpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        w = ps_search_silence_wid(ngs);
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if ((hmm_frame(&rhmm->hmm) < cf)
            || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
            hmm_enter(&rhmm->hmm, newscore,
                      best_silrc_bp, nf);
            bitvec_set(ngs->word_active, w);
        }
    }
    /* Transition to noise words */
    newscore = best_silrc_score + ngs->fillpen + pip;
    if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) {
        for (w = ps_search_silence_wid(ngs) + 1; w < ps_search_n_words(ngs); w++) {
            rhmm = (root_chan_t *) ngs->word_chan[w];
            /* Noise words that aren't a single phone will have NULL here. */
            if (rhmm == NULL)
                continue;
            if ((hmm_frame(&rhmm->hmm) < cf)
                || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) {
                hmm_enter(&rhmm->hmm, newscore,
                          best_silrc_bp, nf);
                bitvec_set(ngs->word_active, w);
            }
        }
    }

    /* Reset initial channels of words that have become inactive even after word trans. */
    i = ngs->n_active_word[cf & 0x1];
    awl = ngs->active_word_list[cf & 0x1];
    for (w = *(awl++); i > 0; --i, w = *(awl++)) {
        rhmm = (root_chan_t *) ngs->word_chan[w];
        if (hmm_frame(&rhmm->hmm) == cf) {
            hmm_clear_scores(&rhmm->hmm);
        }
    }
}
/**
 * Build net from phone HMMs
 */
static int
phmm_build(allphone_search_t * allphs)
{
    phmm_t *p, **pid2phmm;
    bin_mdef_t *mdef;
    int32 lrc_size;
    uint32 *lc, *rc;
    s3pid_t pid;
    s3cipid_t ci;
    s3cipid_t *filler;
    int n_phmm, n_link;
    int i, nphone;

    mdef = ((ps_search_t *) allphs)->acmod->mdef;
    allphs->ci_phmm =
        (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *));
    pid2phmm =
        (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *));

    /* For each unique ciphone/triphone entry in mdef, create a PHMM node */
    n_phmm = 0;
    nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef);
    E_INFO("Building PHMM net of %d phones\n", nphone);
    for (pid = 0; pid < nphone; pid++) {
        if ((p = phmm_lookup(allphs, pid)) == NULL) {
            //not found, should be created
            p = (phmm_t *) ckd_calloc(1, sizeof(*p));
            hmm_init(allphs->hmmctx, &(p->hmm), FALSE,
                     mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat);
            p->pid = pid;
            p->ci = bin_mdef_pid2ci(mdef, pid);
            p->succlist = NULL;
            p->next = allphs->ci_phmm[p->ci];
            allphs->ci_phmm[p->ci] = p;
            n_phmm++;
        }
        pid2phmm[pid] = p;
    }

    /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */
    lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef));
    lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t));
    rc = lc + (n_phmm * lrc_size);
    for (ci = 0; ci < mdef->n_ciphone; ci++) {
        for (p = allphs->ci_phmm[ci]; p; p = p->next) {
            p->lc = lc;
            lc += lrc_size;
            p->rc = rc;
            rc += lrc_size;
        }
    }

    /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */
    filler =
        (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1,
                                 sizeof(s3cipid_t));

    /* Connect fillers */
    i = 0;
    for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) {
        p = pid2phmm[ci];
        bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef));
        bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef));
        if (mdef->phone[ci].info.ci.filler) {
            filler[i++] = ci;
        }
    }
    filler[i] = BAD_S3CIPID;


    /* Loop over cdphones only if ci_only is not set */
    for (pid = bin_mdef_n_ciphone(mdef); pid < nphone;
         pid++) {
        p = pid2phmm[pid];

        if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) {
            for (i = 0; IS_S3CIPID(filler[i]); i++)
                bitvec_set(p->lc, filler[i]);
        }
        else
            bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]);

        if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) {
            for (i = 0; IS_S3CIPID(filler[i]); i++)
                bitvec_set(p->rc, filler[i]);
        }
        else
            bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]);
    }
    ckd_free(pid2phmm);
    ckd_free(filler);

    /* Create links between PHMM nodes */
    n_link = phmm_link(allphs);

    E_INFO("%d nodes, %d links\n", n_phmm, n_link);
    return 0;
}