示例#1
0
static int
acmod_process_full_cep(acmod_t *acmod,
                       mfcc_t ***inout_cep,
                       int *inout_n_frames)
{
    int32 nfr;

    /* Write to log file. */
    if (acmod->mfcfh)
        acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);

    /* Resize feat_buf to fit. */
    if (acmod->n_feat_alloc < *inout_n_frames) {

        if (*inout_n_frames > MAX_N_FRAMES)
            E_FATAL("Batch processing can not process more than %d frames "
                    "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames);

        feat_array_free(acmod->feat_buf);
        acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
        acmod->n_feat_alloc = *inout_n_frames;
        acmod->n_feat_frame = 0;
        acmod->feat_outidx = 0;
    }
    /* Make dynamic features. */
    nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
                               TRUE, TRUE, acmod->feat_buf);
    acmod->n_feat_frame = nfr;
    assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
    *inout_cep += *inout_n_frames;
    *inout_n_frames = 0;

    return nfr;
}
示例#2
0
acmod_t *
acmod_copy(acmod_t *other)
{
    acmod_t *acmod;

    acmod = ckd_calloc(1, sizeof(*acmod));
    acmod->refcount = 1;
    acmod->config = cmd_ln_retain(other->config);
    acmod->lmath = logmath_retain(other->lmath);
    acmod->mdef = bin_mdef_retain(other->mdef);
    acmod->tmat = tmat_retain(other->tmat);
    acmod->mgau = ps_mgau_copy(other->mgau);
    acmod->fb = featbuf_retain(other->fb);
    acmod->fcb = other->fcb; /* Implicitly retained with fb, I think */

    /* Senone computation stuff. */
    acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                      sizeof(*acmod->senone_scores));
    acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
    acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                      sizeof(*acmod->senone_active));
    acmod->log_zero = logmath_get_zero(acmod->lmath);
    acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen");

    acmod->feat_buf = feat_array_alloc(acmod->fcb, 1);

    return acmod;
}
示例#3
0
static int
acmod_process_full_cep(acmod_t *acmod,
                       mfcc_t ***inout_cep,
                       int *inout_n_frames)
{
    int32 nfr;

    /* Write to log file. */
    if (acmod->mfcfh)
        acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);

    /* Resize feat_buf to fit. */
    if (acmod->n_feat_alloc < *inout_n_frames) {
        feat_array_free(acmod->feat_buf);
        acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
        acmod->n_feat_alloc = *inout_n_frames;
        acmod->n_feat_frame = 0;
        acmod->feat_outidx = 0;
    }
    /* Make dynamic features. */
    nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
                               TRUE, TRUE, acmod->feat_buf);
    acmod->n_feat_frame = nfr;
    assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
    *inout_cep += *inout_n_frames;
    *inout_n_frames = 0;
    return nfr;
}
示例#4
0
acmod_t *
acmod_init(cmd_ln_t *config, logmath_t *lmath, featbuf_t *fb)
{
    acmod_t *acmod;

    acmod = ckd_calloc(1, sizeof(*acmod));
    acmod->refcount = 1;
    acmod->config = cmd_ln_retain(config);
    acmod->lmath = logmath_retain(lmath);
    acmod->fb = featbuf_retain(fb);
    acmod->fcb = featbuf_get_fcb(acmod->fb);

    /* Load acoustic model parameters. */
    if (acmod_init_am(acmod) < 0)
        goto error_out;

    /* Senone computation stuff. */
    acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                      sizeof(*acmod->senone_scores));
    acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
    acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                      sizeof(*acmod->senone_active));
    acmod->log_zero = logmath_get_zero(acmod->lmath);
    acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");

    acmod->feat_buf = feat_array_alloc(acmod->fcb, 1);
    return acmod;

error_out:
    acmod_free(acmod);
    return NULL;
}
示例#5
0
文件: live2.c 项目: wdebeaum/cabot
int
ld_init(live_decoder_t *decoder, int argc, char **argv)
{
  param_t fe_param;

  if (argc == 2) {
    /*
     * lgalescu: check if args need to be processed from file
     */
    parse_args_file(argv[1]);
  } else 
    cmd_ln_parse(arg_def, argc, argv);

  unlimit();
	
  /* some decoder parameter capturing
   * !!! NOTE - HARDCODED FOR NOW.  REPLACE WITH PARSE_ARG() ASAP !!!!
   */
  memset(decoder, 0, sizeof(live_decoder_t));
  kb_init(&decoder->kb);
  decoder->max_wpf = cmd_ln_int32 ("-maxwpf");;
  decoder->max_histpf = cmd_ln_int32 ("-maxhistpf");
  decoder->max_hmmpf = cmd_ln_int32 ("-maxhmmpf");
  decoder->phones_skip = cmd_ln_int32 ("-ptranskip");
  decoder->hmm_log = cmd_ln_int32("-hmmdump") ? stderr : NULL;
	
  decoder->kbcore = decoder->kb.kbcore;
  decoder->kb.uttid = decoder->uttid;
  decoder->hypsegs = 0;
  decoder->num_hypsegs = 0;
  decoder->hypstr_len = 0;
  decoder->hypstr[0] = '\0';
  decoder->features =
    feat_array_alloc(kbcore_fcb(decoder->kbcore), LIVEBUFBLOCKSIZE);
  decoder->ld_state = LD_STATE_IDLE;
	
  /* some front-end parameter capturing
   * !!! NOTE - HARDCODED FOR NOW.  REPLACE WITH PARSE_ARG() ASAP !!!!
   */
  memset(&fe_param, 0, sizeof(param_t));
  fe_param.SAMPLING_RATE = (float32)cmd_ln_int32 ("-samprate");
  fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf");
  fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf");
  fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt");
  fe_param.FRAME_RATE = cmd_ln_int32("-frate");
  fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha");
  fe_param.FFT_SIZE = cmd_ln_int32("-nfft");
  fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen");
	
  decoder->fe = fe_init(&fe_param);
  if (!decoder->fe) {
    E_WARN("Front end initialization fe_init() failed\n");
    return -1;
  }
	
  return 0;
}
示例#6
0
void
acmod_grow_feat_buf(acmod_t *acmod, int nfr)
{
    mfcc_t ***new_feat_buf;

    new_feat_buf = feat_array_alloc(acmod->fcb, nfr);
    if (acmod->n_feat_frame || acmod->grow_feat) {
        memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0],
               (acmod->n_feat_alloc
                * feat_dimension(acmod->fcb)
                * sizeof(***acmod->feat_buf)));
    }
    feat_array_free(acmod->feat_buf);
    acmod->framepos = ckd_realloc(acmod->framepos,
                                  nfr * sizeof(*acmod->framepos));
    acmod->feat_buf = new_feat_buf;
    acmod->n_feat_alloc = nfr;
}
示例#7
0
acmod_t *
acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
{
    acmod_t *acmod;
    char const *featparams;

    acmod = ckd_calloc(1, sizeof(*acmod));
    acmod->config = cmd_ln_retain(config);
    acmod->lmath = lmath;
    acmod->state = ACMOD_IDLE;

    /* Look for feat.params in acoustic model dir. */
    if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
        if (NULL !=
            cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE))
            E_INFO("Parsed model-specific feature parameters from %s\n",
                    featparams);
    }

    /* Initialize feature computation. */
    if (fe) {
        if (acmod_fe_mismatch(acmod, fe))
            goto error_out;
        fe_retain(fe);
        acmod->fe = fe;
    }
    else {
        /* Initialize a new front end. */
        acmod->fe = fe_init_auto_r(config);
        if (acmod->fe == NULL)
            goto error_out;
        if (acmod_fe_mismatch(acmod, acmod->fe))
            goto error_out;
    }
    if (fcb) {
        if (acmod_feat_mismatch(acmod, fcb))
            goto error_out;
        feat_retain(fcb);
        acmod->fcb = fcb;
    }
    else {
        /* Initialize a new fcb. */
        if (acmod_init_feat(acmod) < 0)
            goto error_out;
    }

    /* Load acoustic model parameters. */
    if (acmod_init_am(acmod) < 0)
        goto error_out;


    /* The MFCC buffer needs to be at least as large as the dynamic
     * feature window.  */
    acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
    acmod->mfc_buf = (mfcc_t **)
        ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
                      sizeof(**acmod->mfc_buf));

    /* Feature buffer has to be at least as large as MFCC buffer. */
    acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
    acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
    acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));

    acmod->utt_start_frame = 0;

    /* Senone computation stuff. */
    acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_scores));
    acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
    acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_active));
    acmod->log_zero = logmath_get_zero(acmod->lmath);
    acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
    return acmod;

error_out:
    acmod_free(acmod);
    return NULL;
}
示例#8
0
int
agg_phn_seg(lexicon_t *lex,
	    acmod_set_t *acmod_set,
	    feat_t *fcb,
	    segdmp_type_t type)
{
    uint16 *seg;
    vector_t *mfcc;
    vector_t **feat;
    int32 n_frame;
    uint32 tick_cnt;

    acmod_id_t *phone;
    uint32 *start;
    uint32 *len;
    uint32 n_phone;
    uint32 s;
    char *btw_mark;

    char *trans;
    char **word;
    uint32 n_word;
    int32 mfc_veclen = cmd_ln_int32("-ceplen");

    uint32 n_stream;
    uint32 *veclen;

    tick_cnt = 0;

    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);

    while (corpus_next_utt()) {
	if ((++tick_cnt % 500) == 0) {
	    E_INFOCONT("[%u] ", tick_cnt);
	}

	if (corpus_get_sent(&trans) != S3_SUCCESS) {
	    E_FATAL("Unable to read word transcript for %s\n", corpus_utt_brief_name());
	}

	if (corpus_get_seg(&seg, &n_frame) != S3_SUCCESS) {
	    E_FATAL("Unable to read Viterbi state segmentation for %s\n", corpus_utt_brief_name());
	}
	    
	n_word = str2words(trans, NULL, 0);
	word = ckd_calloc(n_word, sizeof(char*));
	str2words(trans, word, n_word);

	phone = mk_phone_list(&btw_mark, &n_phone, word, n_word, lex);
	start = ckd_calloc(n_phone, sizeof(uint32));
	len = ckd_calloc(n_phone, sizeof(uint32));

	/* check to see whether the word transcript and dictionary entries
	   agree with the state segmentation */
	if (ck_seg(acmod_set, phone, n_phone, seg, n_frame, corpus_utt()) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);
	    
	    E_ERROR("ck_seg failed");

	    continue;
	}

	if (cvt2triphone(acmod_set, phone, btw_mark, n_phone) != S3_SUCCESS) {
	    free(trans);	/* alloc'ed using strdup, not ckd_*() */
	    free(seg);		/* alloc'ed using malloc in areadshort(), not ckd_*() */
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("cvt2triphone failed");
	    
	    continue;
	}

	ckd_free(btw_mark);

	if (mk_seg(acmod_set,
		   seg,
		   n_frame,
		   phone,
		   start,
		   len,
		   n_phone) != S3_SUCCESS) {
	    free(trans);
	    free(seg);
	    ckd_free(word);
	    ckd_free(phone);

	    E_ERROR("mk_seg failed");
	    continue;
	}
	
	if (corpus_provides_mfcc()) {
    	        if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
		      E_FATAL("Can't read input features from %s\n", corpus_utt());
		}
		
		if (n_frame < 9) {
		  E_WARN("utt %s too short\n", corpus_utt());
		  if (mfcc) {
		    ckd_free(mfcc[0]);
		    ckd_free(mfcc);
		    mfcc = NULL;
		  }
		  continue;
		}

		feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	        feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

		for (s = 0; s < n_phone; s++) {
		    segdmp_add_feat(phone[s],
				    &feat[start[s]],
				    len[s]);
		}

		feat_array_free(feat);
		free(&mfcc[0][0]);
		ckd_free(mfcc);
	}
	else {
	    E_FATAL("No data type specified\n");
	}

	free(trans);	/* alloc'ed using strdup, not ckd_*() */
	free(seg);	/* alloc'ed using malloc in areadshort(), not ckd_*() */
	ckd_free(word);
	ckd_free(phone);
	ckd_free(start);
	ckd_free(len);
    }

    return 0;
}
示例#9
0
文件: kb.c 项目: 4auka/cmusphinx
void
kb_init(kb_t * kb, cmd_ln_t *config)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    int32 cisencnt;

    /* STRUCTURE: Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = kbcore_init(config);
    if (kb->kbcore == NULL)
        E_FATAL("Initialization of kb failed\n");

    kbcore = kb->kbcore;
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    d2p = kbcore_dict2pid(kbcore);

    err_set_debug_level(cmd_ln_int32_r(config, "-debug"));

    /* STRUCTURE INITIALIZATION: Initialize the beam data structure */
    if (cmd_ln_exists_r(config, "-ptranskip")) {
        kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"),
                             cmd_ln_float64_r(config, "-pbeam"),
                             cmd_ln_float64_r(config, "-wbeam"),
                             cmd_ln_float64_r(config, "-wend_beam"),
                             cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef),
                             kbcore->logmath
            );

        /* REPORT : Report the parameters in the beam data structure */
        if (REPORT_KB)
                beam_report(kb->beam);
    }


    /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */
    if (cmd_ln_exists_r(config, "-ci_pbeam")) {
        kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"),
                                    cmd_ln_int32_r(config, "-cond_ds"),
                                    cmd_ln_int32_r(config, "-dist_ds"),
                                    cmd_ln_int32_r(config, "-gs4gs"),
                                    cmd_ln_int32_r(config, "-svq4svq"),
                                    cmd_ln_float64_r(config, "-subvqbeam"),
                                    cmd_ln_float64_r(config, "-ci_pbeam"),
                                    cmd_ln_float64_r(config, "-tighten_factor"),
                                    cmd_ln_int32_r(config, "-maxcdsenpf"),
                                    mdef->n_ci_sen,
                                    kbcore->logmath);

        /* REPORT : Report the parameters in the fast_gmm_t data struture */
        if (REPORT_KB)
            fast_gmm_report(kb->fastgmm);
    }

    /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */
    if (cmd_ln_exists_r(config, "-pl_beam")) {
        kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"),
                         cmd_ln_float64_r(config, "-pl_beam"), mdef_n_ciphone(mdef),
                         kbcore->logmath
            );

        /* REPORT : Report the parameters in the pl_t data struture */
        if (REPORT_KB)
            pl_report(kb->pl);
    }

    /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */
    {
        int32 pl_window = 1;

        if (cmd_ln_exists_r(config, "-pl_window"))
            pl_window = cmd_ln_int32_r(config, "-pl_window");

        for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ;
        kb->ascr = ascr_init(kbcore_n_mgau(kbcore),
                             kb->kbcore->dict2pid->n_comstate,
                             mdef_n_sseq(mdef),
                             dict2pid_n_comsseq(d2p),
                             pl_window, cisencnt);

        if (REPORT_KB)
            ascr_report(kb->ascr);
    }

    /* Initialize the front end if -adcin is specified */
    if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) {
        if ((kb->fe = fe_init_auto_r(config)) == NULL) {
            E_FATAL("fe_init_auto_r() failed\n");
        }
    }
    /* STRUCTURE INITIALIZATION : The feature vector */
    if ((kb->feat =
         feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL)
        E_FATAL("feat_array_alloc() failed\n");

    /* STRUCTURE INITIALIZATION : The statistics for the search */
    kb->stat = stat_init();

    /* STRUCTURE INITIALIZATION : The adaptation routines of the search */
    kb->adapt_am = adapt_am_init();

    if (cmd_ln_str_r(config, "-mllr")) {
        kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb);
    }

    /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */
    if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL)
        E_FATAL
            ("Conditional Down Sampling require the use of Gaussian Selection map\n");

    /* MEMORY ALLOCATION : Word best score and exit */
    /* Open hypseg file if specified */
    kb->matchsegfp = kb->matchfp = NULL;
    kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg"));
    kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp"));

    if (cmd_ln_exists_r(config, "-hmmdump"))
        kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL;

    /* STRUCTURE INITIALIZATION : The search data structure, done only
       after kb is initialized kb is acted as a clipboard. */
    if (cmd_ln_exists_r(config, "-op_mode")) {
        /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */
        if (cmd_ln_int32_r(config, "-op_mode") != -1)
            kb->op_mode = cmd_ln_int32_r(config, "-op_mode");
        else
            kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode"));
        E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode);
        if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) {
            E_FATAL("Search initialization failed. Forced exit\n");
        }
        if (REPORT_KB) {
            srch_report(kb->srch);
        }
    }
}
示例#10
0
文件: kb.c 项目: yqzhang/MemProfile
/*ARCHAN, to allow backward compatibility -lm, -lmctlfn coexists. This makes the current implmentation more complicated than necessary. */
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    lmset_t *lmset;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    int32 cisencnt;
    int32 j;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = NULL;

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      cmd_ln_str("-feat"),
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-lmctlfn"),
			      cmd_ln_str("-lmdumpdir"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_str("-senmgau"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-gs"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    if(kb->kbcore==NULL){
      E_FATAL("Initialization of kb failed\n");
    }

    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    lmset=kbcore_lmset(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);

    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	if (NOT_S3LMWID(lm_startwid(lmset[i].lm)) || NOT_S3LMWID(lm_finishwid(lmset[i].lm)))
	E_FATAL("%s or %s not in LM %s\n", S3_START_WORD, S3_FINISH_WORD,lmset[i].name);
      }
    }else if(lm){
      if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    }

    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    if(lmset){
      for(i=0;i<kbcore_nlm(kbcore);i++){
	lm_lmwid2dictwid(lmset[i].lm, lm_startwid(lmset[i].lm)) = BAD_S3WID;
	lm_lmwid2dictwid(lmset[i].lm, lm_finishwid(lmset[i].lm)) = BAD_S3WID;

	for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;
	for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	  lmset[i].lm->dict2lmwid[w] = BAD_S3LMWID;

      }
    }else if(lm){ /* No LM is set at this point*/
      lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
      lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
      for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
      for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;

    }
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->rec_sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), sizeof(int32));
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;

    E_INFO("Building lextrees\n");
    /* Get the number of lexical tree*/
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }

    /* ARCHAN: This code was rearranged in s3.4 implementation of dynamic LM */
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));


    if(lmset){
      kb->ugtreeMulti = (lextree_t **) ckd_calloc (kbcore_nlm(kbcore)*kb->n_lextree, sizeof(lextree_t *));
      /* Just allocate pointers*/
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));

      for(i=0;i<kbcore_nlm(kbcore);i++){
	E_INFO("Creating Unigram Table for lm %d name %s\n",i,lmset[i].name);
	n=0;
	for(j=0;j<dict_size(dict);j++){ /*try to be very careful again */
	  wp[j].wid=-1;
	  wp[j].prob=-1;
	}
	n = lm_ug_wordprob (lmset[i].lm, dict,MAX_NEG_INT32, wp);
	E_INFO("Size of word table after unigram + words in class: %d.\n",n);
	if (n < 1)
	  E_FATAL("%d active words in %s\n", n,lmset[i].name);
	n = wid_wordprob2alt(dict,wp,n);
	E_INFO("Size of word table after adding alternative prons: %d.\n",n);
	if (cmd_ln_int32("-treeugprob") == 0) {
	  for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
	}

	for (j = 0; j < kb->n_lextree; j++) {
	  kb->ugtreeMulti[i*kb->n_lextree+j] = lextree_build (kbcore, wp, n, lc);
	  lextree_type (kb->ugtreeMulti[i*kb->n_lextree+j]) = 0;
	  E_INFO("Lextrees (%d) for lm %d name %s, %d nodes(ug)\n",
		 kb->n_lextree, i, lmset[i].name,lextree_n_node(kb->ugtreeMulti[i*kb->n_lextree+j]));
	}
      }

    }else if (lm){
      E_INFO("Creating Unigram Table\n");
      n=0;
      n = lm_ug_wordprob (lm, dict,MAX_NEG_INT32, wp);
      E_INFO("Size of word table after unigram + words in class: %d\n",n);
      if (n < 1)
	E_FATAL("%d active words\n", n);
      n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
      
      /* Retain or remove unigram probs from lextree, depending on option */
      if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	  wp[i].prob = -1;    	/* Flatten all initial probabilities */
      }
      
      /* Create the desired no. of unigram lextrees */
      kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
      for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
      }
      E_INFO("Lextrees(%d), %d nodes(ug)\n",
	     kb->n_lextree, lextree_n_node(kb->ugtree[0]));
    }



    /* Create filler lextrees */
    /* ARCHAN : only one filler tree is supposed to be build even for dynamic LMs */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }


    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    ckd_free ((void *) lc);
    bitvec_free (lc_active);


    E_INFO("Lextrees(%d), %d nodes(filler)\n",
	     kb->n_lextree, 
	     lextree_n_node(kb->fillertree[0]));
    

    if (cmd_ln_int32("-lextreedump")) {
      if(lmset){
	E_FATAL("Currently, doesn't support -lextreedump for multiple-LMs\n");
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "UGTREE %d\n", i);
	lextree_dump (kb->ugtree[i], dict, stderr);
      }
      for (i = 0; i < kb->n_lextree; i++) {
	fprintf (stderr, "FILLERTREE %d\n", i);
	lextree_dump (kb->fillertree[i], dict, stderr);
      }
      fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    /*Sections of optimization related parameters*/
    kb->ds_ratio=cmd_ln_int32("-ds");
    E_INFO("Down Sampling Ratio = %d\n",kb->ds_ratio);
    
    kb->rec_bstcid=-1;
    kb->skip_count=0;
    
    kb->cond_ds=cmd_ln_int32("-cond_ds");
    E_INFO("Conditional Down Sampling Parameter = %d\n",kb->cond_ds);
    
    if(kb->cond_ds>0&&kb->kbcore->gs==NULL) E_FATAL("Conditional Down Sampling require the use of Gaussian Selection map\n");

    kb->gs4gs=cmd_ln_int32("-gs4gs");
    E_INFO("GS map would be used for Gaussian Selection? = %d\n",kb->gs4gs);

    kb->svq4svq=cmd_ln_int32("-svq4svq");
    E_INFO("SVQ would be used as Gaussian Score ?= %d\n",kb->svq4svq);

    kb->ci_pbeam=-1*logs3(cmd_ln_float32("-ci_pbeam"));
    E_INFO("CI phone beam to prune the number of parent CI phones in CI-base GMM Selection = %d\n",kb->ci_pbeam);
    if(kb->ci_pbeam>10000000){
      E_INFO("Virtually no CI phone beam is applied now. (ci_pbeam>1000000)\n");
    }
    
    kb->wend_beam=-1*logs3(cmd_ln_float32("-wend_beam"));
    E_INFO("Word-end pruning beam: %d\n",kb->wend_beam);

    kb->pl_window=cmd_ln_int32("-pl_window");
    E_INFO("Phoneme look-ahead window size = %d\n",kb->pl_window);

	kb->pl_window_start=0;

    kb->pl_beam=logs3(cmd_ln_float32("-pl_beam"));
    E_INFO("Phoneme look-ahead beam = %d\n",kb->pl_beam);

    for(cisencnt=0;cisencnt==mdef->cd2cisen[cisencnt];cisencnt++) 
      ;

    kb->cache_ci_senscr=(int32**)ckd_calloc_2d(kb->pl_window,cisencnt,sizeof(int32));
    kb->cache_best_list=(int32*)ckd_calloc(kb->pl_window,sizeof(int32));
    kb->phn_heur_list=(int32*)ckd_calloc(mdef_n_ciphone (mdef),sizeof(int32));
  

    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    ptmr_init (&(kb->tm_ovrhd));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");

    if(lmset)
      n = ((kb->ugtreeMulti[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    else
      n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;

    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }

    str = cmd_ln_str("-hyp");
    kb->matchfp = NULL;
    if (str) {
#ifdef SPEC_CPU_WINDOWS
	if ((kb->matchfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
示例#11
0
文件: kb.c 项目: 10v/cmusphinx
void kb_init (kb_t *kb)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    lm_t *lm;
    s3cipid_t sil, ci;
    s3wid_t w;
    int32 i, n, n_lc;
    wordprob_t *wp;
    s3cipid_t *lc;
    bitvec_t lc_active;
    char *str;
    
    /* Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));

    kb->kbcore = kbcore_init (cmd_ln_float32 ("-logbase"),
			      "1s_c_d_dd",    /* Hack!! Hardwired constant 
						for -feat argument */
			      cmd_ln_str("-cmn"),
			      cmd_ln_str("-varnorm"),
			      cmd_ln_str("-agc"),
			      cmd_ln_str("-mdef"),
			      cmd_ln_str("-dict"),
			      cmd_ln_str("-fdict"),
			      "",	/* Hack!! Hardwired constant 
						for -compsep argument */
			      cmd_ln_str("-lm"),
			      cmd_ln_str("-fillpen"),
			      cmd_ln_float32("-silprob"),
			      cmd_ln_float32("-fillprob"),
			      cmd_ln_float32("-lw"),
			      cmd_ln_float32("-wip"),
			      cmd_ln_float32("-uw"),
			      cmd_ln_str("-mean"),
			      cmd_ln_str("-var"),
			      cmd_ln_float32("-varfloor"),
			      cmd_ln_str("-mixw"),
			      cmd_ln_float32("-mixwfloor"),
			      cmd_ln_str("-subvq"),
			      cmd_ln_str("-tmat"),
			      cmd_ln_float32("-tmatfloor"));
    
    kbcore = kb->kbcore;
    
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    lm = kbcore_lm(kbcore);
    d2p = kbcore_dict2pid(kbcore);
    
    if (NOT_S3WID(dict_startwid(dict)) || NOT_S3WID(dict_finishwid(dict)))
	E_FATAL("%s or %s not in dictionary\n", S3_START_WORD, S3_FINISH_WORD);
    if (NOT_S3LMWID(lm_startwid(lm)) || NOT_S3LMWID(lm_finishwid(lm)))
	E_FATAL("%s or %s not in LM\n", S3_START_WORD, S3_FINISH_WORD);
    
    /* Check that HMM topology restrictions are not violated */
    if (tmat_chk_1skip (kbcore->tmat) < 0)
	E_FATAL("Tmat contains arcs skipping more than 1 state\n");
    
    /*
     * Unlink <s> and </s> between dictionary and LM, to prevent their 
     * recognition.  They are merely dummy words (anchors) at the beginning 
     * and end of each utterance.
     */
    lm_lmwid2dictwid(lm, lm_startwid(lm)) = BAD_S3WID;
    lm_lmwid2dictwid(lm, lm_finishwid(lm)) = BAD_S3WID;
    for (w = dict_startwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    for (w = dict_finishwid(dict); IS_S3WID(w); w = dict_nextalt(dict, w))
	kbcore->dict2lmwid[w] = BAD_S3LMWID;
    
    sil = mdef_silphone (kbcore_mdef (kbcore));
    if (NOT_S3CIPID(sil))
	E_FATAL("Silence phone '%s' not in mdef\n", S3_SILENCE_CIPHONE);
    
    E_INFO("Building lextrees\n");
    
    kb->sen_active = (int32 *) ckd_calloc (mdef_n_sen(mdef), sizeof(int32));
    kb->ssid_active = (int32 *) ckd_calloc (mdef_n_sseq(mdef), sizeof(int32));
    kb->comssid_active = (int32 *) ckd_calloc (dict2pid_n_comsseq(d2p), 
							sizeof(int32));
    /* Build active word list */
    wp = (wordprob_t *) ckd_calloc (dict_size(dict), sizeof(wordprob_t));
    n = lm_ug_wordprob (lm, MAX_NEG_INT32, wp);
    if (n < 1)
	E_FATAL("%d active words\n", n);
    n = wid_wordprob2alt (dict, wp, n);	   /* Add alternative pronunciations */
    
    /* Retain or remove unigram probs from lextree, depending on option */
    if (cmd_ln_int32("-treeugprob") == 0) {
	for (i = 0; i < n; i++)
	    wp[i].prob = -1;    	/* Flatten all initial probabilities */
    }
    
    /* Build set of all possible left contexts */
    lc = (s3cipid_t *) ckd_calloc (mdef_n_ciphone(mdef) + 1, sizeof(s3cipid_t));
    lc_active = bitvec_alloc (mdef_n_ciphone (mdef));
    for (w = 0; w < dict_size (dict); w++) {
	ci = dict_pron (dict, w, dict_pronlen(dict, w) - 1);
	if (! mdef_is_fillerphone (mdef, (int)ci))
	    bitvec_set (lc_active, ci);
    }
    ci = mdef_silphone(mdef);
    bitvec_set (lc_active, ci);
    for (ci = 0, n_lc = 0; ci < mdef_n_ciphone(mdef); ci++) {
	if (bitvec_is_set (lc_active, ci))
	    lc[n_lc++] = ci;
    }
    lc[n_lc] = BAD_S3CIPID;
    
    /* Create the desired no. of unigram lextrees */
    kb->n_lextree = cmd_ln_int32 ("-Nlextree");
    if (kb->n_lextree < 1) {
	E_ERROR("No. of ugtrees specified: %d; will instantiate 1 ugtree\n", 
								kb->n_lextree);
	kb->n_lextree = 1;
    }
    kb->ugtree = (lextree_t **) ckd_calloc (kb->n_lextree, sizeof(lextree_t *));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->ugtree[i] = lextree_build (kbcore, wp, n, lc);
	lextree_type (kb->ugtree[i]) = 0;
    }
    bitvec_free (lc_active);
    ckd_free ((void *) lc);
    
    /* Create filler lextrees */
    n = 0;
    for (i = dict_filler_start(dict); i <= dict_filler_end(dict); i++) {
	if (dict_filler_word(dict, i)) {
	    wp[n].wid = i;
	    wp[n].prob = fillpen (kbcore->fillpen, i);
	    n++;
	}
    }
    kb->fillertree = (lextree_t **)ckd_calloc(kb->n_lextree,sizeof(lextree_t*));
    for (i = 0; i < kb->n_lextree; i++) {
	kb->fillertree[i] = lextree_build (kbcore, wp, n, NULL);
	lextree_type (kb->fillertree[i]) = -1;
    }
    ckd_free ((void *) wp);
    
    E_INFO("Lextrees(%d), %d nodes(ug), %d nodes(filler)\n",
	   kb->n_lextree, lextree_n_node(kb->ugtree[0]), 
			lextree_n_node(kb->fillertree[0]));
    
    if (cmd_ln_int32("-lextreedump")) {
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "UGTREE %d\n", i);
	    lextree_dump (kb->ugtree[i], dict, stderr);
	}
	for (i = 0; i < kb->n_lextree; i++) {
	    fprintf (stderr, "FILLERTREE %d\n", i);
	    lextree_dump (kb->fillertree[i], dict, stderr);
	}
	fflush (stderr);
    }
    
    kb->ascr = ascr_init (mgau_n_mgau(kbcore_mgau(kbcore)), 
				kbcore->dict2pid->n_comstate);
    kb->beam = beam_init (cmd_ln_float64("-subvqbeam"),
			  cmd_ln_float64("-beam"),
			  cmd_ln_float64("-pbeam"),
			  cmd_ln_float64("-wbeam"));
    E_INFO("Beam= %d, PBeam= %d, WBeam= %d, SVQBeam= %d\n",
	   kb->beam->hmm, kb->beam->ptrans, kb->beam->word, kb->beam->subvq);
    
    if ((kb->feat = feat_array_alloc(kbcore_fcb(kbcore),S3_MAX_FRAMES)) == NULL)
	E_FATAL("feat_array_alloc() failed\n");
    
    kb->vithist = vithist_init(kbcore, kb->beam->word, cmd_ln_int32("-bghist"));
    
    ptmr_init (&(kb->tm_sen));
    ptmr_init (&(kb->tm_srch));
    kb->tot_fr = 0;
    kb->tot_sen_eval = 0.0;
    kb->tot_gau_eval = 0.0;
    kb->tot_hmm_eval = 0.0;
    kb->tot_wd_exit = 0.0;
    
    kb->hmm_hist_binsize = cmd_ln_int32("-hmmhistbinsize");
    n = ((kb->ugtree[0]->n_node) + (kb->fillertree[0]->n_node)) * kb->n_lextree;
    n /= kb->hmm_hist_binsize;
    kb->hmm_hist_bins = n+1;
    kb->hmm_hist = (int32 *) ckd_calloc (n+1, sizeof(int32));	/* Really no need for +1 */
    
    /* Open hypseg file if specified */
    str = cmd_ln_str("-hypseg");
    kb->matchsegfp = NULL;
    if (str) {
#ifdef WIN32
	if ((kb->matchsegfp = fopen(str, "wt")) == NULL)
#else
	if ((kb->matchsegfp = fopen(str, "w")) == NULL)
#endif
	    E_ERROR("fopen(%s,w) failed; use FWDXCT: from std logfile\n", str);
    }
}
示例#12
0
int
main(int32 argc, char *argv[])
{
    char sent[16384];
    cmd_ln_t *config;

    print_appl_info(argv[0]);
    cmd_ln_appl_enter(argc, argv, "default.arg", defn);

    unlimit();
    config = cmd_ln_get();

    ctloffset = cmd_ln_int32_r(config, "-ctloffset");
    sentfile = cmd_ln_str_r(config, "-insent");

    if ((sentfp = fopen(sentfile, "r")) == NULL)
        E_FATAL_SYSTEM("Failed to open file %s for reading", sentfile);

    /* Note various output directories */
    if (cmd_ln_str_r(config, "-s2stsegdir") != NULL)
        s2stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-s2stsegdir"));
    if (cmd_ln_str_r(config, "-stsegdir") != NULL)
        stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-stsegdir"));
    if (cmd_ln_str_r(config, "-phsegdir") != NULL)
        phsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phsegdir"));
    if (cmd_ln_str_r(config, "-phlabdir") != NULL)
        phlabdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phlabdir"));
    if (cmd_ln_str_r(config, "-wdsegdir") != NULL)
        wdsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-wdsegdir"));

    /* HACK! Pre-read insent without checking whether ctl could also 
       be read.  In general, this is caused by the fact that we used
       multiple files to specify resource in sphinx III.  This is easy
       to solve but currently I just to remove process_ctl because it
       duplicates badly with ctl_process.  

       The call back function will take care of matching the uttfile
       names. We don't need to worry too much about inconsistency. 
     */

    while (ctloffset > 0) {
        if (fgets(sent, sizeof(sent), sentfp) == NULL) {
            E_ERROR("EOF(%s)\n", sentfile);
            break;
        }
        --ctloffset;
    }

    if ((outsentfile = cmd_ln_str_r(config, "-outsent")) != NULL) {
        if ((outsentfp = fopen(outsentfile, "w")) == NULL)
            E_FATAL_SYSTEM("Failed to open file %s for writing", outsentfile);
    }

    if ((outctlfile = cmd_ln_str_r(config, "-outctl")) != NULL) {
        if ((outctlfp = fopen(outctlfile, "w")) == NULL)
            E_FATAL_SYSTEM("Failed top open file %s for writing", outctlfile);
    }

    if ((cmd_ln_str_r(config, "-s2stsegdir") == NULL) &&
        (cmd_ln_str_r(config, "-stsegdir") == NULL) &&
        (cmd_ln_str_r(config, "-phlabdir") == NULL) &&
        (cmd_ln_str_r(config, "-phsegdir") == NULL) &&
        (cmd_ln_str_r(config, "-wdsegdir") == NULL) &&
        (cmd_ln_str_r(config, "-outsent") == NULL))
        E_FATAL("Missing output file/directory argument(s)\n");

    /* Read in input databases */
    models_init(config);

    if (!feat)
        feat = feat_array_alloc(kbcore_fcb(kbc), S3_MAX_FRAMES);

    timers[tmr_utt].name = "U";
    timers[tmr_gauden].name = "G";
    timers[tmr_senone].name = "S";
    timers[tmr_align].name = "A";

    /* Initialize align module */
    align_init(kbc->mdef, kbc->tmat, dict, config, kbc->logmath);
    printf("\n");

    if (cmd_ln_str_r(config, "-mllr") != NULL) {
        if (kbc->mgau)
            adapt_set_mllr(adapt_am, kbc->mgau, cmd_ln_str_r(config, "-mllr"), NULL,
                           kbc->mdef, config);
        else if (kbc->ms_mgau)
            model_set_mllr(kbc->ms_mgau, cmd_ln_str_r(config, "-mllr"), NULL, kbcore_fcb(kbc),
                           kbc->mdef, config);
        else
            E_WARN("Can't use MLLR matrices with .s2semi. yet\n");
    }

    tot_nfr = 0;

    /*  process_ctlfile (); */

    if (cmd_ln_str_r(config, "-ctl")) {
        /* When -ctlfile is speicified, corpus.c will look at -ctl_mllr to get
           the corresponding  MLLR for the utterance */
        ctl_process(cmd_ln_str_r(config, "-ctl"),
                    NULL,
                    cmd_ln_str_r(config, "-ctl_mllr"),
                    cmd_ln_int32_r(config, "-ctloffset"),
                    cmd_ln_int32_r(config, "-ctlcount"),
                    utt_align, config);
    }
    else {
        E_FATAL(" -ctl are not specified.\n");
    }

    if (tot_nfr > 0) {
        printf("\n");
        printf("TOTAL FRAMES:       %8d\n", tot_nfr);
        printf("TOTAL CPU TIME:     %11.2f sec, %7.2f xRT\n",
               tm_utt.t_tot_cpu, tm_utt.t_tot_cpu / (tot_nfr * 0.01));
        printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n",
               tm_utt.t_tot_elapsed,
               tm_utt.t_tot_elapsed / (tot_nfr * 0.01));
    }

    if (outsentfp)
        fclose(outsentfp);
    if (outctlfp)
        fclose(outctlfp);
    if (sentfp)
        fclose(sentfp);

    ckd_free(s2stsegdir);
    ckd_free(stsegdir);
    ckd_free(phsegdir);
    ckd_free(wdsegdir);

    feat_array_free(feat);
    align_free();
    models_free();

#if (! WIN32)
    system("ps aguxwww | grep s3align");
#endif

    cmd_ln_free_r(config);
    return 0;
}
示例#13
0
文件: live.c 项目: wdebeaum/cabot
int32 live_utt_decode_block (int16 *samples, int32 nsamples, 
			     int32 live_endutt, partialhyp_t **ohyp)
{
    static int32 live_begin_new_utt = 1;
    static int32 frmno;
    static float32 ***live_feat = NULL;
    
    int32   live_nfr, live_nfeatvec;
    int32   nwds =0;
    float32 **mfcbuf;
    /*    int i,j;*/
    /* 2004/08/27 L Galescu <*****@*****.**> -- added raw audio file saving */
    static char uttfn[1024];
    static FILE *rawfp = NULL;
    int16 block_peak_amplitude;

    if(live_feat==NULL)
        live_feat = feat_array_alloc (kbcore_fcb(kbcore), LIVEBUFBLOCKSIZE);
    
    if (live_begin_new_utt){
        fe_start_utt(fe);
        utt_begin (kb);
        frmno = 0;
        kb->nfr = 0;
        kb->utt_hmm_eval = 0;
        kb->utt_sen_eval = 0;
        kb->utt_gau_eval = 0;
        live_begin_new_utt = 0;
        sprintf(uttfn, "%s/%s.raw", cmd_ln_str("-outrawdir"), kb->uttid);
        rawfp = fopen(uttfn, "wb");
    }
    /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */
    mfcbuf = NULL;

    /* LG 20080613 */
    block_peak_amplitude = get_peak_amplitude(samples, nsamples);
    if (block_peak_amplitude > peak_amplitude)
      peak_amplitude = block_peak_amplitude;
    E_INFO("segment peak %d\n",peak_amplitude);

    live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /**/
    if (rawfp != NULL) {
      fwrite(samples, sizeof(int16), nsamples, rawfp);
      if (live_endutt)
	  fclose(rawfp);
    }

    if (live_endutt) {
        /* RAH, It seems that we shouldn't throw out this data */
        fe_end_utt(fe,dummyframe); /* Flush out the fe */
    }
#if 0
    E_INFO("Number frame after fe_process_utt %d\n",live_nfr);
    for(i=0;i<live_nfr;i++){
      printf("%d ",i);
      for(j=0;j<13;j++){
        printf("%f ",mfcbuf[i][j]);
        fflush(stdout);
      }
      printf("\n");
      fflush(stdout);
    }
#endif
    /* lgalescu 2004/08/22 -- i am under the impression that 
     * feat_s2mfc2feat_block() needs to be called at the end of utt 
     * even if no frames need processing
     */
    /* lgalescu 2004/10/13 -- rescinded the above */
    if(live_nfr>0){
      /* Compute feature vectors */
      live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf,
					    live_nfr, live_begin_new_utt,
					    live_endutt, live_feat);

#if 0   
    E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec);
#endif
#if 0
      E_INFO("Current frame number %d, Number of frames %d, Number frame after feat_s2mfcfeat_block %d\n",frmno,live_nfr,live_nfeatvec);
      
      for(i=0;i<live_nfeatvec;i++){
        printf("%d\n",i);
        printf("Cep: ");
        fflush(stdout);
        for(j=0;j<13;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout);
        }
        printf("\n");
        fflush(stdout);
        printf("Del: ");
        fflush(stdout);
        for(j=13;j<26;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout); 
        }
        printf("\n");
        fflush(stdout);
        printf("Acc: ");
        fflush(stdout);
        for(j=26;j<39;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout);
        }
        printf("\n");
        fflush(stdout);
        
      }
#endif
    
      /* decode the block */
      utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, 
			maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp);

      /* lgalescu 2004/08/21
       * moved the following block out of the previous if(){} because we need 
       * the output even when no feature computation has to be done.
       */
      /* lgalescu 2004/10/13 -- rescinded */

      /* Pull out partial hypothesis */
      nwds =  live_get_partialhyp(live_endutt);
      *ohyp = parthyp;
      parthyplen = nwds;
    }

    /* Clean up */
    if (live_endutt) {
      live_begin_new_utt = 1;
      kb->tot_fr += kb->nfr;
      utt_end(kb);
    }
    else {
      live_begin_new_utt = 0;
    }
    
    /* I'm starting to think that fe_process_utt should not be allocating its 
     * memory, that or it should allocate some max and just keep on going, 
     * this idea of constantly allocating freeing memory seems dangerous to me.
     */
    /* 20040318 ARCHAN : It sounds extremely dangerous to me and I will 
     * eliminate it sometime. 
     */
    /* lgalescu: i second that! the memory issue needs to be investigated: after a run on linux, i noticed some 1.6M of memory having "disappeared"! */

    if(live_nfr>0){
      ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */
    }

    return(parthyplen);
}
示例#14
0
文件: am.c 项目: revic1993/cmusphinx
acoustic_t *acoustic_init (feat_t *f, gauden_t *g, senone_t *s,
                           float64 beam,
                           int32 maxfr)
{
    acoustic_t *am;
    int32 i;

    if (senone_n_mgau(s) != gauden_n_mgau(g)) {
        E_ERROR("#Parent mixture Gaussians mismatch: senone(%d), gauden(%d)\n",
                senone_n_mgau(s), gauden_n_mgau(g));
    }

    if (feat_n_stream(f) != senone_n_stream(s)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), senone(%d)\n",
                feat_n_stream(f), senone_n_stream(s));
    }

    if (feat_n_stream(f) != gauden_n_stream(g)) {
        E_ERROR("#Feature-streams mismatch: feat(%d), gauden(%d)\n",
                feat_n_stream(f), gauden_n_stream(g));
        return NULL;
    }

    for (i = 0; i < feat_n_stream(f); i++) {
        if (feat_stream_len(f, i) != gauden_stream_len(g, i)) {
            E_ERROR("Feature stream(%d) length mismatch: feat(%d), gauden(%d)\n",
                    feat_stream_len(f, i), gauden_stream_len(g, i));
            return NULL;
        }
    }

    if (beam > 1.0) {
        E_ERROR("mgaubeam > 1.0 (%e)\n", beam);
        return NULL;
    }

    am = (acoustic_t *) ckd_calloc (1, sizeof(acoustic_t));

    am->fcb = f;
    am->gau = g;
    am->sen = s;

    am->mgaubeam = (beam == 0.0) ? LOGPROB_ZERO : logs3(beam);
    if (am->mgaubeam > 0)
        am->mgaubeam = 0;
    am->tot_mgau_eval = 0;
    am->tot_dist_valid = 0.0;

    am->dist_valid = (am->mgaubeam <= LOGPROB_ZERO) ? NULL :
                     (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));

    if (f->compute_feat) {
        /* Input is MFC cepstra; feature vectors computed from that */
        am->mfc = (float32 **) ckd_calloc_2d (maxfr, feat_cepsize(am->fcb),
                                              sizeof(float32));
        am->feat = feat_array_alloc (f, 1);
    } else {
        /* Input is directly feature vectors */
        am->mfc = NULL;
        am->feat = feat_array_alloc (f, maxfr);
    }

    am->dist = (int32 *) ckd_calloc (g->max_n_mean, sizeof(int32));
    am->gauden_active = bitvec_alloc (g->n_mgau);

    am->senscr = (int32 *) ckd_calloc (s->n_sen, sizeof(int32));
    am->senscale = (int32 *) ckd_calloc (maxfr, sizeof(int32));
    am->sen_active = bitvec_alloc (s->n_sen);

    return am;
}
static int
ld_init_impl(live_decoder_t * _decoder, int32 _internal_cmdln)
{
    param_t fe_param;
    int rv = LD_SUCCESS;

    assert(_decoder != NULL);

    unlimit();

    /* ARCHAN 20050708: This part should be factored with fe_parse_option */
    /* allocate and initialize front-end */
    fe_init_params(&fe_param);
    fe_param.SAMPLING_RATE = cmd_ln_float32("-samprate");
    fe_param.FRAME_RATE = cmd_ln_int32("-frate");
    fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen");
    fe_param.FB_TYPE = strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0 ?
        MEL_SCALE : LOG_LINEAR;
    fe_param.NUM_CEPSTRA = cmd_ln_int32("-ncep");
    fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt");
    fe_param.FFT_SIZE = cmd_ln_int32("-nfft");
    fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf");
    fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf");
    fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha");
    fe_param.dither = strcmp("no", cmd_ln_str("-dither"));
    fe_param.warp_type = cmd_ln_str("-warp_type");
    fe_param.warp_params = cmd_ln_str("-warp_params");

    if ((_decoder->fe = fe_init(&fe_param)) == NULL) {
        E_WARN("Failed to initialize front-end.\n");
        rv = LD_ERROR_OUT_OF_MEMORY;
        goto ld_init_impl_cleanup;
    }

    /* capture decoder parameters */
    kb_init(&_decoder->kb);

    /* initialize decoder variables */
    _decoder->kbcore = _decoder->kb.kbcore;
    _decoder->hyp_frame_num = -1;
    _decoder->uttid = NULL;
    _decoder->ld_state = LD_STATE_IDLE;
    _decoder->hyp_str = NULL;
    _decoder->hyp_segs = NULL;

    /*
       _decoder->swap= (cmd_ln_int32("-machine_endian") != cmd_ln_int32("-input_endian"));
     */

    _decoder->swap =
        (strcmp(cmd_ln_str("-machine_endian"), cmd_ln_str("-input_endian"))
         != 0);

    _decoder->phypdump = (cmd_ln_int32("-phypdump"));
    _decoder->rawext = (cmd_ln_str("-rawext"));

    if (_decoder->phypdump)
        E_INFO("Partial hypothesis WILL be dumped\n");
    else
        E_INFO("Partial hypothesis will NOT be dumped\n");


    if (_decoder->swap)
        E_INFO("Input data WILL be byte swapped\n");
    else
        E_INFO("Input data will NOT be byte swapped\n");


    _decoder->internal_cmdln = _internal_cmdln;
    _decoder->features =
        feat_array_alloc(kbcore_fcb(_decoder->kbcore), LIVEBUFBLOCKSIZE);
    if (_decoder->features == NULL) {
        E_WARN("Failed to allocate internal feature buffer.\n");
        rv = LD_ERROR_OUT_OF_MEMORY;
        goto ld_init_impl_cleanup;
    }

    return LD_SUCCESS;

  ld_init_impl_cleanup:
    if (_decoder->fe != NULL) {
        fe_close(_decoder->fe);
    }
    if (_decoder->features != NULL) {
        /* consult the implementation of feat_array_alloc() for how to free our
         * internal feature vector buffer */
        ckd_free((void *) **_decoder->features);
        ckd_free_2d((void **) _decoder->features);
    }
    if (_internal_cmdln == TRUE) {
        cmd_ln_free();
    }
    _decoder->ld_state = LD_STATE_FINISHED;

    return rv;
}
int
agg_all_seg(feat_t *fcb,
	    segdmp_type_t type,
	    const char *fn,
	    uint32 stride)
{
    uint32 seq_no;
    vector_t *mfcc = NULL;
    uint32 mfc_veclen = cmd_ln_int32("-ceplen");
    uint32 n_frame;
    uint32 n_out_frame;
    uint32 blksz=0;
    vector_t **feat = NULL;
    uint32 i, j;
    uint32 t;
    uint32 n_stream;
    const uint32 *veclen;
    FILE *fp;
    uint32 ignore = 0;
    long start;
    int32 no_retries=0;
    
    n_stream = feat_dimension1(fcb);
    veclen = feat_stream_lengths(fcb);
    for (i = 0, blksz = 0; i < n_stream; i++)
        blksz += veclen[i];

    fp = open_dmp(fn);

    start = ftell(fp);

    if (s3write(&i, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");

	return S3_ERROR;
    }

    for (seq_no = corpus_get_begin(), j = 0, n_out_frame = 0;
	 corpus_next_utt(); seq_no++) {
	    if (mfcc) {
		free(mfcc[0]);
		ckd_free(mfcc);

		mfcc = NULL;
	    }

	    /* get the MFCC data for the utterance */
	    if (corpus_get_generic_featurevec(&mfcc, &n_frame, mfc_veclen) < 0) {
	      E_FATAL("Can't read input features from %s\n", corpus_utt());
	    }

	if ((seq_no % 1000) == 0) {
	    E_INFO("[%u]\n", seq_no);
	}

	    if (feat) {
		feat_array_free(feat);
		feat = NULL;
	    }
	    
	    if (n_frame < 9) {
	      E_WARN("utt %s too short\n", corpus_utt());
	      if (mfcc) {
		ckd_free(mfcc[0]);
		ckd_free(mfcc);
		mfcc = NULL;
	      }
	      continue;
	    }

	    feat = feat_array_alloc(fcb, n_frame + feat_window_size(fcb));
	    feat_s2mfc2feat_live(fcb, mfcc, &n_frame, TRUE, TRUE, feat);

	    for (t = 0; t < n_frame; t++, j++) {
		if ((j % stride) == 0) {
		    while (s3write(&feat[t][0][0],
				   sizeof(float32),
				   blksz,
				   fp, &ignore) != blksz) {
			static int rpt = 0;

			if (!rpt) {
			    E_ERROR_SYSTEM("Unable to write to dmp file");
			    E_INFO("sleeping...\n");
			    no_retries++;
			}
			sleep(3);

			if(no_retries > 10){
			  E_FATAL("Failed to write to a dmp file after 10 retries of getting MFCC(about 30 seconds)\n ");
			}
		    }
		    ++n_out_frame;
		}
	    }
    }

    if (fseek(fp, start, SEEK_SET) < 0) {
	E_ERROR_SYSTEM("Unable to seek to begin of dmp");

	return S3_ERROR;
    }

    E_INFO("Wrote %u frames to %s\n", n_out_frame, fn);

    if (s3write((void *)&n_out_frame, sizeof(uint32), 1, fp, &ignore) != 1) {
	E_ERROR_SYSTEM("Unable to write to dmp file");
	
	return S3_ERROR;
    }

    return S3_SUCCESS;
}