Пример #1
0
static int32
interp_read(interp_t * ip, const char *file_name)
{
    FILE *fp;
    int32 byteswap, chksum_present;
    int32 i;
    char eofchk;
    float f;
    char **argname, **argval;
    uint32 chksum;

    E_INFO("Reading interpolation weights: %s\n", file_name);

    if ((fp = fopen(file_name, "rb")) == NULL)
        E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);

    /* Read header, including argument-value info and 32-bit byteorder magic */
    if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0)
        E_FATAL("bio_readhdr(%s) failed\n", file_name);

    /* Parse argument-value list */
    chksum_present = 0;
    for (i = 0; argname[i]; i++) {
        if (strcmp(argname[i], "version") == 0) {
            if (strcmp(argval[i], INTERP_VERSION) != 0)
                E_WARN("Version mismatch(%s): %s, expecting %s\n",
                       file_name, argval[i], INTERP_VERSION);
        }
        else if (strcmp(argname[i], "chksum0") == 0) {
            chksum_present = 1; /* Ignore the associated value */
        }
    }
    bio_hdrarg_free(argname, argval);
    argname = argval = NULL;

    chksum = 0;

    /* Read #senones */
    if (bio_fread(&(ip->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) !=
        1)
        E_FATAL("fread(%s) (arraysize) failed\n", file_name);
    if (ip->n_sen <= 0)
        E_FATAL("%s: arraysize= %d in header\n", file_name, ip->n_sen);

    ip->wt =
        (struct interp_wt_s *) ckd_calloc(ip->n_sen,
                                          sizeof(struct interp_wt_s));

    for (i = 0; i < ip->n_sen; i++) {
        if (bio_fread(&f, sizeof(float32), 1, fp, byteswap, &chksum) != 1)
            E_FATAL("fread(%s) (arraydata) failed\n", file_name);
        if ((f < 0.0) || (f > 1.0))
            E_FATAL("%s: interpolation weight(%d)= %e\n", file_name, i, f);

        ip->wt[i].cd = (f == 0.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, f);
        ip->wt[i].ci = (f == 1.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, 1.0 - f);
    }

    if (chksum_present)
        bio_verify_chksum(fp, byteswap, chksum);

    if (fread(&eofchk, 1, 1, fp) == 1)
        E_FATAL("More data than expected in %s\n", file_name);

    fclose(fp);

    E_INFO("Read %d interpolation weights\n", ip->n_sen);

    return 1;
}
Пример #2
0
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    lmset_t *lmset;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    int32 cisencnt;
    int32 j;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = NULL;

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      cmd_ln_str("-feat"),
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-lmctlfn"),
			      cmd_ln_str("-lmdumpdir"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_str("-senmgau"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-gs"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    if(kb->kbcore==NULL){
      E_FATAL("Initialization of kb failed\n");
    }

    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    lmset=kbcore_lmset(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);

    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm)))
	E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name);
      }
    }else if(lm){
      if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    }

    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID;
	lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID;

	for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;
	for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;

      }
    }else if(lm){ /* No LM is set at this point*/
      lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
      lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
      for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
      for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;

    }
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32));
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;

    E_INFO("Building lextrees\n");
    /* Get the number of lexical tree*/
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }

    /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));


    if(lmset){
      kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *));
      /* Just allocate pointers*/
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));

      for(i=0;i<kbcore_nlm(kbcore);i++){
	E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name);
	n=0;
	for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */
	  wp[j].wid=-1;
	  wp[j].prob=-1;
	}
	n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp);
	E_INFO("Size of word table after unigram + words in class: %d.\n",n);
	if (n < 1)
	  E_FATAL("%d active words in %s\n", n,lmset[i].name);
	n = wid_wordprob2alt(dict,wp,n);
	E_INFO("Size of word table after adding alternative prons: %d.\n",n);
	if (cmd_ln_int32("-treeugprob") == 0) {
	  for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
	}

	for (j = 0; j < kb->n_lextree; j++) {
	  kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc);
	  lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0;
	  E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n",
		 kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j]));
	}
      }

    }else if (lm){
      E_INFO("Creating Unigram Table\n");
      n=0;
      n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp);
      E_INFO("Size of word table after unigram + words in class: %d\n",n);
      if (n < 1)
	E_FATAL("%d active words\n", n);
      n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
      
      /* Retain or remove unigram probs from lextree, depending on option */
      if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	  wp[i].prob = -1;    	/* Flatten all initial probabilities */
      }
      
      /* Create the desired no. of unigram lextrees */
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
      for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
      }
      E_INFO("Lextrees(%d), %d nodes(ug)\n",
	     kb->n_lextree, lextree_n_node(kb->ugtree[0]));
    }



    /* Create filler lextrees */
    /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }


    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    ckd_free ((void *) lc);
    bitvec_free (lc_active);


    E_INFO("Lextrees(%d), %d nodes(filler)\n",
	     kb->n_lextree, 
	     lextree_n_node(kb->fillertree[0]));
    

    if (cmd_ln_int32("-lextreedump")) {
      if(lmset){
	E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n");
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "UGTREE %d\n", i);
	lextree_dump (kb->ugtree[i], dict, stderr);
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "FILLERTREE %d\n", i);
	lextree_dump (kb->fillertree[i], dict, stderr);
      }
      fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    /*Sections of optimization related parameters*/
    kb->ds_ratio=cmd_ln_int32("-ds");
    E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio);
    
    kb->rec_bstcid=-1;
    kb->skip_count=0;
    
    kb->cond_ds=cmd_ln_int32("-cond_ds");
    E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds);
    
    if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n");

    kb->gs4gs=cmd_ln_int32("-gs4gs");
    E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs);

    kb->svq4svq=cmd_ln_int32("-svq4svq");
    E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq);

    kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam"));
    E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam);
    if(kb->ci_pbeam>10000000){
      E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n");
    }
    
    kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam"));
    E_INFO("Word-end pruning beam: %d\n",kb->wend_beam);

    kb->pl_window=cmd_ln_int32("-pl_window");
    E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window);

	kb->pl_window_start=0;

    kb->pl_beam=logs3(cmd_ln_float32("-pl_beam"));
    E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam);

    for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) 
      ;

    kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32));
    kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32));
    kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32));
  

    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    ptmr_init (&(kb->tm_ovrhd));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

    if(lmset)
      n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    else
      n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }

    str = cmd_ln_str("-hyp");
    kb->matchfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
Пример #3
0
tmat_t *tmat_init (char *file_name, float64 tpfloor)
{
    char tmp;
    int32 n_src, n_dst;
    FILE *fp;
    int32 byteswap, chksum_present;
    uint32 chksum;
    float32 **tp;
    int32 i, j, k, tp_per_tmat;
    char **argname, **argval;
    tmat_t *t;
    
    E_INFO("Reading HMM transition probability matrices: %s\n", file_name);

    t = (tmat_t *) ckd_calloc (1, sizeof(tmat_t));

    if ((fp = fopen(file_name, "rb")) == NULL)
	E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name);
    
    /* Read header, including argument-value info and 32-bit byteorder magic */
    if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0)
	E_FATAL("bio_readhdr(%s) failed\n", file_name);
    
    /* Parse argument-value list */
    chksum_present = 0;
    for (i = 0; argname[i]; i++) {
	if (strcmp (argname[i], "version") == 0) {
	    if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0)
		E_WARN("Version mismatch(%s): %s, expecting %s\n",
			file_name, argval[i], TMAT_PARAM_VERSION);
	} else if (strcmp (argname[i], "chksum0") == 0) {
	    chksum_present = 1;	/* Ignore the associated value */
	}
    }
    bio_hdrarg_free (argname, argval);
    argname = argval = NULL;
    
    chksum = 0;
    
    /* Read #tmat, #from-states, #to-states, arraysize */
    if ((bio_fread (&(t->n_tmat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) ||
	(bio_fread (&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != 1) ||
	(bio_fread (&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != 1) ||
	(bio_fread (&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) {
	E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name);
    }
    if (t->n_tmat >= MAX_S3TMATID)
	E_FATAL("%s: #tmat (%d) exceeds limit (%d)\n", file_name, t->n_tmat, MAX_S3TMATID);
    if (n_dst != n_src+1)
	E_FATAL("%s: #from-states(%d) != #to-states(%d)-1\n", file_name, n_src, n_dst);
    t->n_state = n_src;
    
    if (i != t->n_tmat * n_src * n_dst) {
	E_FATAL("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n",
		file_name, i, t->n_tmat, n_src, n_dst);
    }

    /* Allocate memory for tmat data */
    t->tp = (int32 ***) ckd_calloc_3d (t->n_tmat, n_src, n_dst, sizeof(int32));

    /* Temporary structure to read in the float data */
    tp = (float32 **) ckd_calloc_2d (n_src, n_dst, sizeof(float32));

    /* Read transition matrices, normalize and floor them, and convert to logs3 domain */
    tp_per_tmat = n_src * n_dst;
    for (i = 0; i < t->n_tmat; i++) {
	if (bio_fread (tp[0], sizeof(float32), tp_per_tmat, fp,
		       byteswap, &chksum) != tp_per_tmat) {
	    E_FATAL("fread(%s) (arraydata) failed\n", file_name);
	}
	
	/* Normalize and floor */
	for (j = 0; j < n_src; j++) {
	    if (vector_sum_norm (tp[j], n_dst) == 0.0)
		E_ERROR("Normalization failed for tmat %d from state %d\n", i, j);
	    vector_nz_floor (tp[j], n_dst, tpfloor);
	    vector_sum_norm (tp[j], n_dst);

	    /* Convert to logs3.  Take care of special case when tp = 0.0! */
	    for (k = 0; k < n_dst; k++)
		t->tp[i][j][k] = (tp[j][k] == 0.0) ? S3_LOGPROB_ZERO : logs3(tp[j][k]);
	}
    }

    ckd_free_2d ((void **) tp);

    if (chksum_present)
	bio_verify_chksum (fp, byteswap, chksum);

    if (fread (&tmp, 1, 1, fp) == 1)
	E_ERROR("Non-empty file beyond end of data\n");

    fclose(fp);

    E_INFO("Read %d transition matrices of size %dx%d\n",
	   t->n_tmat, t->n_state, t->n_state+1);
    
    if (tmat_chk_uppertri (t) < 0)
	E_FATAL("Tmat not upper triangular\n");
    
    return t;
}
Пример #4
0
word_fsg_t *
word_fsg_load(s2_fsg_t * fsg,
              int use_altpron, int use_filler,
	      kbcore_t *kbc)
{
    float32 silprob = kbc->fillpen->silprob;
    float32 fillprob = kbc->fillpen->fillerprob;
    float32 lw = kbc->fillpen->lw;
    word_fsg_t *word_fsg;
    s2_fsg_trans_t *trans;
    int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk;
    int32 wid;
    int32 logp;
    glist_t nulls;
    int32 i, j;

    assert(fsg);

    /* Some error checking */
    if (lw <= 0.0)
        E_WARN("Unusual language-weight value: %.3e\n", lw);
    if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) {
        E_ERROR("silprob/fillprob must be >= 0\n");
        return NULL;
    }
    if ((fsg->n_state <= 0)
        || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state))
        || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) {
        E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n",
                fsg->n_state, fsg->start_state, fsg->final_state);
        return NULL;
    }
    for (trans = fsg->trans_list; trans; trans = trans->next) {
        if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state)
            || (trans->to_state < 0) || (trans->to_state >= fsg->n_state)
            || (trans->prob <= 0) || (trans->prob > 1.0)) {
            E_ERROR("Bad transition: P(%d -> %d) = %e\n",
                    trans->from_state, trans->to_state, trans->prob);
            return NULL;
        }
    }


    word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t));
    word_fsg->name = ckd_salloc(fsg->name ? fsg->name : "");
    word_fsg->n_state = fsg->n_state;
    word_fsg->start_state = fsg->start_state;
    word_fsg->final_state = fsg->final_state;
    word_fsg->use_altpron = use_altpron;
    word_fsg->use_filler = use_filler;
    word_fsg->lw = lw;
    word_fsg->lc = NULL;
    word_fsg->rc = NULL;
    word_fsg->dict = kbc->dict;
    word_fsg->mdef = kbc->mdef;
    word_fsg->tmat = kbc->tmat;
    word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef);


    /* Allocate non-epsilon transition matrix array */
    word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state,
                                                 word_fsg->n_state,
                                                 sizeof(glist_t));
    /* Allocate epsilon transition matrix array */
    word_fsg->null_trans = (word_fsglink_t ***)
        ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state,
                      sizeof(word_fsglink_t *));

    /* Process transitions */
    n_null_trans = 0;
    n_alt_trans = 0;
    n_filler_trans = 0;
    n_unk = 0;
    nulls = NULL;

    for (trans = fsg->trans_list, n_trans = 0;
         trans; trans = trans->next, n_trans++) {
        /* Convert prob to logs2prob and apply language weight */
        logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw);

        /* Check if word is in dictionary */
        if (trans->word) {
            wid = dict_wordid(kbc->dict, trans->word);
            if (wid < 0) {
                E_ERROR("Unknown word '%s'; ignored\n", trans->word);
                n_unk++;
            }
            else if (use_altpron) {
                wid = dict_basewid(kbc->dict, wid);
                assert(wid >= 0);
            }
        }
        else
            wid = -1;           /* Null transition */

        /* Add transition to word_fsg structure */
        i = trans->from_state;
        j = trans->to_state;
        if (wid < 0) {
            if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) {
                n_null_trans++;
                nulls =
                    glist_add_ptr(nulls,
                                  (void *) word_fsg->null_trans[i][j]);
            }
        }
        else {
            word_fsg_trans_add(word_fsg, i, j, logp, wid);

            /* Add transitions for alternative pronunciations, if any */
            if (use_altpron) {
                for (wid = dict_nextalt(kbc->dict, wid);
                     wid >= 0; wid = dict_nextalt(kbc->dict, wid)) {
                    word_fsg_trans_add(word_fsg, i, j, logp, wid);
                    n_alt_trans++;
                    n_trans++;
                }
            }
        }
    }

    /* Add silence and noise filler word transitions if specified */
    if (use_filler) {
        n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc));
        n_trans += n_filler_trans;
    }

    E_INFO
        ("FSG: %d states, %d transitions (%d null, %d alt, %d filler,  %d unknown)\n",
         word_fsg->n_state, n_trans, n_null_trans, n_alt_trans,
         n_filler_trans, n_unk);

#if __FSG_DBG__
    E_INFO("FSG before NULL closure:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    /* Null transitions closure */
    nulls = word_fsg_null_trans_closure(word_fsg, nulls);
    glist_free(nulls);

#if __FSG_DBG__
    E_INFO("FSG after NULL closure:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    /* Compute left and right context CIphone lists for each state */
    word_fsg_lc_rc(word_fsg);

#if __FSG_DBG__
    E_INFO("FSG after lc/rc:\n");
    word_fsg_write(word_fsg, stdout);
#endif

    return word_fsg;
}
Пример #5
0
fillpen_t *fillpen_init (dict_t *dict, char *file, float64 silprob, float64 fillprob,
			 float64 lw, float64 wip)
{
    s3wid_t w, bw;
    float64 prob;
    FILE *fp;
    char line[1024], wd[1024];
    int32 k;
    fillpen_t *_fillpen;
    
    _fillpen = (fillpen_t *) ckd_calloc (1, sizeof(fillpen_t));
    
    _fillpen->dict = dict;
    _fillpen->lw = lw;
    _fillpen->wip = wip;
    if (dict->filler_end >= dict->filler_start)
	_fillpen->prob = (int32 *) ckd_calloc (dict->filler_end - dict->filler_start + 1,
					       sizeof(int32));
    else
	_fillpen->prob = NULL;
    
    /* Initialize all words with filler penalty (HACK!! backward compatibility) */
    prob = fillprob;
    for (w = dict->filler_start; w <= dict->filler_end; w++)
	_fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);

    /* Overwrite silence penalty (HACK!! backward compatibility) */
    w = dict_wordid (dict, S3_SILENCE_WORD);
    if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end))
	E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD);
    prob = silprob;
    _fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);
    
    /* Overwrite with filler prob input file, if specified */
    if (! file)
	return _fillpen;
    
    E_INFO("Reading filler penalty file: %s\n", file);
    if ((fp = fopen (file, "r")) == NULL)
	E_FATAL("fopen(%s,r) failed\n", file);
    while (fgets (line, sizeof(line), fp) != NULL) {
	if (line[0] == '#')	/* Skip comment lines */
	    continue;
	
	k = sscanf (line, "%s %lf", wd, &prob);
	if ((k != 0) && (k != 2))
	    E_FATAL("Bad input line: %s\n", line);
	w = dict_wordid(dict, wd);
	if (NOT_S3WID(w) || (w < dict->filler_start) || (w > dict->filler_end))
	    E_FATAL("%s not a filler word in the given dictionary\n", S3_SILENCE_WORD);
	
	_fillpen->prob[w - dict->filler_start] = (int32) ((logs3(prob) + logs3(wip)) * lw);
    }
    fclose (fp);
    
    /* Replicate fillpen values for alternative pronunciations */
    for (w = dict->filler_start; w <= dict->filler_end; w++) {
	bw = dict_basewid (dict, w);
	if (bw != w)
	    _fillpen->prob[w-dict->filler_start] = _fillpen->prob[bw-dict->filler_start];
    }
    
    return _fillpen;
}
Пример #6
0
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s,
                           float64 beam,
                           int32 maxfr)
{
    acoustic_t *am;
    int32 i;

    if (senone_n_mgau(s) != gauden_n_mgau(g)) {
        E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n",
                senone_n_mgau(s), gauden_n_mgau(g));
    }

    if (feat_n_stream(f) != senone_n_stream(s)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n",
                feat_n_stream(f), senone_n_stream(s));
    }

    if (feat_n_stream(f) != gauden_n_stream(g)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n",
                feat_n_stream(f), gauden_n_stream(g));
        return NULL;
    }

    for (i = 0; i < feat_n_stream(f); i++) {
        if (feat_stream_len(f, i) != gauden_stream_len(g, i)) {
            E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n",
                    feat_stream_len(f, i), gauden_stream_len(g, i));
            return NULL;
        }
    }

    if (beam > 1.0) {
        E_ERROR("mgaubeam > 1.0 (%e)\n", beam);
        return NULL;
    }

    am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t));

    am->fcb = f;
    am->gau = g;
    am->sen = s;

    am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam);
    if (am->mgaubeam > 0)
        am->mgaubeam = 0;
    am->tot_mgau_eval = 0;
    am->tot_dist_valid = 0.0;

    am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL :
                     (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));

    if (f->compute_feat) {
        /* Input is MFC cepstra; feature vectors computed from that */
        am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb),
                                              sizeof(float32));
        am->feat = feat_array_alloc (f, 1);
    } else {
        /* Input is directly feature vectors */
        am->mfc = NULL;
        am->feat = feat_array_alloc (f, maxfr);
    }

    am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));
    am->gauden_active = bitvec_alloc (g->n_mgau);

    am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32));
    am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32));
    am->sen_active = bitvec_alloc (s->n_sen);

    return am;
}
Пример #7
0
main (int32 argc, char *argv[])
{
    kb_t kb;
    kbcore_t *kbcore;
    bitvec_t active;
    int32 w;
    
    cmd_ln_parse (arglist, argc, argv);
    unlimit();
    
    kbcore = kbcore_init (cmd_ln_float32("-logbase"),
			  cmd_ln_str("-feat"),
			  cmd_ln_str("-mdef"),
			  cmd_ln_str("-dict"),
			  cmd_ln_str("-fdict"),
			  cmd_ln_str("-compsep"),
			  cmd_ln_str("-lm"),
			  cmd_ln_str("-fillpen"),
			  cmd_ln_float32("-silprob"),
			  cmd_ln_float32("-fillprob"),
			  cmd_ln_float32("-lw"),
			  cmd_ln_float32("-wip"),
			  cmd_ln_str("-mean"),
			  cmd_ln_str("-var"),
			  cmd_ln_float32("-varfloor"),
			  cmd_ln_str("-senmgau"),
			  cmd_ln_str("-mixw"),
			  cmd_ln_float32("-mixwfloor"),
			  cmd_ln_str("-tmat"),
			  cmd_ln_float32("-tmatfloor"));
    
    /* Here's the perfect candidate for inheritance */
    kb.mdef = kbcore->mdef;
    kb.dict = kbcore->dict;
    kb.lm = kbcore->lm;
    kb.fillpen = kbcore->fillpen;
    kb.tmat = kbcore->tmat;
    kb.dict2lmwid = kbcore->dict2lmwid;
    
    if ((kb.am = acoustic_init (kbcore->fcb, kbcore->gau, kbcore->sen, cmd_ln_float32("-mgaubeam"),
				S3_MAX_FRAMES)) == NULL) {
	E_FATAL("Acoustic models initialization failed\n");
    }
    kb.beam = logs3 (cmd_ln_float64("-beam"));
    kb.wordbeam = logs3 (cmd_ln_float64("-wordbeam"));
    kb.wordmax = cmd_ln_int32("-wordmax");
    
    /* Mark the active words and build lextree */
    active = bitvec_alloc (dict_size (kb.dict));
    bitvec_clear_all (active, dict_size(kb.dict));
    for (w = 0; w < dict_size(kb.dict); w++) {
	if (IS_LMWID(kb.dict2lmwid[w]) || dict_filler_word (kb.dict, w))
	    bitvec_set (active, w);
    }
    kb.lextree_root = lextree_build (kb.dict, kb.mdef, active, cmd_ln_int32("-flatdepth"));
    
    kb.vithist = (glist_t *) ckd_calloc (S3_MAX_FRAMES+2, sizeof(glist_t));
    kb.vithist++;	/* Allow for dummy frame -1 for start word */
    kb.lextree_active = NULL;
    kb.wd_last_sf = (int32 *) ckd_calloc (dict_size(kb.dict), sizeof(int32));
    
    kb.tm = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t));
    kb.tm_search = (ptmr_t *) ckd_calloc (1, sizeof(ptmr_t));
    
    ctl_process (cmd_ln_str("-ctl"), cmd_ln_int32("-ctloffset"), cmd_ln_int32("-ctlcount"),
		 decode_utt, &kb);
    
    exit(0);
}
/* RAH 4.16.01 This code has several leaks that must be fixed */
dict2pid_t *dict2pid_build (mdef_t *mdef, dict_t *dict)
{
    dict2pid_t *dict2pid;
    s3ssid_t *internal, **ldiph, **rdiph, *single;
    int32 pronlen;
    hash_table_t *hs, *hp;
    glist_t g;
    gnode_t *gn;
    s3senid_t *sen;
    hash_entry_t *he;
    int32 *cslen;
    int32 i, j, b, l, r, w, n, p;
    
    E_INFO("Building PID tables for dictionary\n");

    dict2pid = (dict2pid_t *) ckd_calloc (1, sizeof(dict2pid_t));
    dict2pid->internal = (s3ssid_t **) ckd_calloc (dict_size(dict), sizeof(s3ssid_t *));
    dict2pid->ldiph_lc = (s3ssid_t ***) ckd_calloc_3d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->single_lc = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone,
						       mdef->n_ciphone,
						       sizeof(s3ssid_t));
    dict2pid->n_comstate = 0;
    dict2pid->n_comsseq = 0;
    
    hs = hash_new (mdef->n_ciphone * mdef->n_ciphone * mdef->n_emit_state, HASH_CASE_YES);
    hp = hash_new (mdef->n_ciphone * mdef->n_ciphone, HASH_CASE_YES);
    
    for (w = 0, n = 0; w < dict_size(dict); w++) {
	pronlen = dict_pronlen(dict, w);
	if (pronlen < 0)
	    E_FATAL("Pronunciation-length(%s)= %d\n", dict_wordstr(dict, w), pronlen);
	n += pronlen;
    }

    internal = (s3ssid_t *) ckd_calloc (n, sizeof(s3ssid_t));
    
    /* Temporary */
    ldiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    rdiph = (s3ssid_t **) ckd_calloc_2d (mdef->n_ciphone, mdef->n_ciphone, sizeof(s3ssid_t));
    single = (s3ssid_t *) ckd_calloc (mdef->n_ciphone, sizeof(s3ssid_t));
    for (b = 0; b < mdef->n_ciphone; b++) {
	for (l = 0; l < mdef->n_ciphone; l++) {
	    for (r = 0; r < mdef->n_ciphone; r++)
		dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID;
	    
	    dict2pid->single_lc[b][l] = BAD_S3SSID;
	    
	    ldiph[b][l] = BAD_S3SSID;
	    rdiph[b][l] = BAD_S3SSID;
	}
	single[b] = BAD_S3SSID;
    }
    
    for (w = 0; w < dict_size(dict); w++) {
	dict2pid->internal[w] = internal;
	pronlen = dict_pronlen(dict,w);
	
	if (pronlen >= 2) {
	    b = dict_pron(dict, w, 0);
	    r = dict_pron(dict, w, 1);
	    if (NOT_S3SSID(ldiph[b][r])) {
		g = ldiph_comsseq(mdef, b, r);
		ldiph[b][r] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    p = mdef_phone_id_nearest (mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_BEGIN);
		    dict2pid->ldiph_lc[b][r][l] = mdef_pid2ssid(mdef, p);
		}
	    }
	    internal[0] = ldiph[b][r];
	    
	    for (i = 1; i < pronlen-1; i++) {
		l = b;
		b = r;
		r = dict_pron(dict, w, i+1);
		
		p = mdef_phone_id_nearest(mdef, (s3cipid_t)b, (s3cipid_t)l, (s3cipid_t)r, WORD_POSN_INTERNAL);
		internal[i] = mdef_pid2ssid(mdef, p);
	    }
	    
	    l = b;
	    b = r;
	    if (NOT_S3SSID(rdiph[b][l])) {
		g = rdiph_comsseq(mdef, b, l);
		rdiph[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
	    }
	    internal[pronlen-1] = rdiph[b][l];
	} else if (pronlen == 1) {
	    b = dict_pron(dict, w, 0);
	    if (NOT_S3SSID(single[b])) {
		g = single_comsseq(mdef, b);
		single[b] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		glist_free (g);
		
		for (l = 0; l < mdef_n_ciphone(mdef); l++) {
		    g = single_lc_comsseq(mdef, b, l);
		    dict2pid->single_lc[b][l] = ssidlist2comsseq (g, mdef, dict2pid, hs, hp);
		    glist_free (g);
		}
	    }
	    internal[0] = single[b];
	}
	
	internal += pronlen;
    }
    
    ckd_free_2d ((void **) ldiph);
    ckd_free_2d ((void **) rdiph);
    ckd_free ((void *) single);
    
    /* Allocate space for composite state table */
    cslen = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    g = hash_tolist(hs, &n);
    assert (n == dict2pid->n_comstate);
    n = 0;
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	for (i = 0; IS_S3SENID(sen[i]); i++);
	
	cslen[hash_entry_val(he)] = i+1;	/* +1 for terminating sentinel */
	
	n += (i+1);
    }
    dict2pid->comstate = (s3senid_t **) ckd_calloc (dict2pid->n_comstate, sizeof(s3senid_t *));
    sen = (s3senid_t *) ckd_calloc (n, sizeof(s3senid_t));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	dict2pid->comstate[i] = sen;
	sen += cslen[i];
    }
    
    /* Build composite state table from hash table hs */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	sen = (s3senid_t *) hash_entry_key(he);
	i = hash_entry_val(he);
	
	for (j = 0; j < cslen[i]; j++)
	    dict2pid->comstate[i][j] = sen[j];
	assert (sen[j-1] == BAD_S3SENID);

	ckd_free ((void *)sen);
    }
    ckd_free (cslen);
    glist_free (g);
    hash_free (hs);
    
    /* Allocate space for composite sseq table */
    dict2pid->comsseq = (s3senid_t **) ckd_calloc (dict2pid->n_comsseq, sizeof(s3senid_t *));
    g = hash_tolist (hp, &n);
    assert (n == dict2pid->n_comsseq);
    
    /* Build composite sseq table */
    for (gn = g; gn; gn = gnode_next(gn)) {
	he = (hash_entry_t *) gnode_ptr (gn);
	i = hash_entry_val(he);
	dict2pid->comsseq[i] = (s3senid_t *) hash_entry_key(he);
    }
    glist_free (g);
    hash_free (hp);
    
    /* Weight for each composite state */
    dict2pid->comwt = (int32 *) ckd_calloc (dict2pid->n_comstate, sizeof(int32));
    for (i = 0; i < dict2pid->n_comstate; i++) {
	sen = dict2pid->comstate[i];
	
	for (j = 0; IS_S3SENID(sen[j]); j++);
#if 0
	/* if comstate i has N states, its weight= (1/N^2) (Major Hack!!) */
	dict2pid->comwt[i] = - (logs3 ((float64)j) << 1);
#else
	/* if comstate i has N states, its weight= 1/N */
	dict2pid->comwt[i] = - logs3 ((float64)j);
#endif
    }
    
    E_INFO("%d composite states; %d composite sseq\n",
	   dict2pid->n_comstate, dict2pid->n_comsseq);
    
    return dict2pid;
}