Exemple #1
0
static int
fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel)
{
    mel->sampling_rate = fe->sampling_rate;
    mel->fft_size = fe->fft_size;
    mel->num_cepstra = fe->num_cepstra;
    mel->num_filters = cmd_ln_int32_r(config, "-nfilt");

    if (fe->log_spec)
        fe->feature_dimension = mel->num_filters;
    else
        fe->feature_dimension = fe->num_cepstra;

    mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf");
    mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf");

    mel->doublewide = cmd_ln_boolean_r(config, "-doublebw");

    mel->warp_type = cmd_ln_str_r(config, "-warp_type");
    mel->warp_params = cmd_ln_str_r(config, "-warp_params");
    mel->lifter_val = cmd_ln_int32_r(config, "-lifter");

    mel->unit_area = cmd_ln_boolean_r(config, "-unit_area");
    mel->round_filters = cmd_ln_boolean_r(config, "-round_filters");

    if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) {
        E_ERROR("Failed to initialize the warping function.\n");
        return -1;
    }
    fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate);
    return 0;
}
Exemple #2
0
int
fe_parse_general_params(cmd_ln_t *config, fe_t * fe)
{
    int j;

    fe->config = config;
    fe->sampling_rate = cmd_ln_float32_r(config, "-samprate");
    fe->frame_rate = (int16)cmd_ln_int32_r(config, "-frate");
    if (cmd_ln_boolean_r(config, "-dither")) {
        fe->dither = 1;
        fe->seed = cmd_ln_int32_r(config, "-seed");
    }
#ifdef WORDS_BIGENDIAN
    fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
#else        
    fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")) == 0 ? 0 : 1;
#endif
    fe->window_length = cmd_ln_float32_r(config, "-wlen");
    fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha");

    fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep");
    fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft");

    /* Check FFT size, compute FFT order (log_2(n)) */
    for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) {
        if (((j % 2) != 0) || (fe->fft_size <= 0)) {
            E_ERROR("fft: number of points must be a power of 2 (is %d)\n",
                    fe->fft_size);
            return -1;
        }
    }
    /* Verify that FFT size is greater or equal to window length. */
    if (fe->fft_size < (int)(fe->window_length * fe->sampling_rate)) {
        E_ERROR("FFT: Number of points must be greater or equal to frame size (%d samples)\n",
                (int)(fe->window_length * fe->sampling_rate));
        return -1;
    }

    fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc");

    if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct"))
        fe->transform = DCT_II;
    else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy"))
        fe->transform = LEGACY_DCT;
    else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk"))
        fe->transform = DCT_HTK;
    else {
        E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
        return -1;
    }

    if (cmd_ln_boolean_r(config, "-logspec"))
        fe->log_spec = RAW_LOG_SPEC;
    if (cmd_ln_boolean_r(config, "-smoothspec"))
        fe->log_spec = SMOOTH_LOG_SPEC;

    return 0;
}
Exemple #3
0
glist_t
srch_FLAT_FWD_bestpath_impl(void *srch,           /**< A void pointer to a search structure */
                            dag_t * dag)
{
    srch_t *s;
    srch_FLAT_FWD_graph_t *fwg;

    float32 bestpathlw;
    float64 lwf;
    srch_hyp_t *tmph, *bph;
    glist_t ghyp, rhyp;

    s = (srch_t *) srch;
    fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct;

    assert(fwg->lathist);

    bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw");
    lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0;

    flat_fwd_dag_add_fudge_edges(fwg,
				 dag,
				 cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"),
				 cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"),
				 (void *) fwg->lathist, s->kbc->dict);


    /* Bypass filler nodes */
    if (!dag->filler_removed) {
        /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */
        if (dict_filler_word(s->kbc->dict, dag->end->wid))
            dag->end->wid = s->kbc->dict->finishwid;

        if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0)
            E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge);
        else
            dag->filler_removed = 1;
    }

    bph =
        dag_search(dag, s->uttid, lwf, dag->end,
                   s->kbc->dict, s->kbc->lmset->cur_lm, s->kbc->fillpen);

    if (bph != NULL) {
        ghyp = NULL;
        for (tmph = bph; tmph; tmph = tmph->next)
            ghyp = glist_add_ptr(ghyp, (void *) tmph);

        rhyp = glist_reverse(ghyp);
        return rhyp;
    }
    else {
        return NULL;
    }

}
Exemple #4
0
ngram_model_t *
ngram_model_read(cmd_ln_t * config,
                 const char *file_name,
                 ngram_file_type_t file_type, logmath_t * lmath)
{
    ngram_model_t *model = NULL;
    switch (file_type) {
    case NGRAM_AUTO:{
            if ((model =
                 ngram_model_trie_read_bin(config, file_name,
                                           lmath)) != NULL)
                break;
            if ((model =
                 ngram_model_trie_read_arpa(config, file_name,
                                            lmath)) != NULL)
                break;
            if ((model =
                 ngram_model_trie_read_dmp(config, file_name,
                                           lmath)) != NULL)
                break;
            return NULL;
        }
    case NGRAM_ARPA:
        model = ngram_model_trie_read_arpa(config, file_name, lmath);
        break;
    case NGRAM_BIN:
        if ((model =
             ngram_model_trie_read_bin(config, file_name, lmath)) != NULL)
            break;
        if ((model =
             ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL)
            break;
        return NULL;
    default:
        E_ERROR("language model file type not supported\n");
        return NULL;
    }

    /* Now set weights based on config if present. */
    if (config) {
        float32 lw = 1.0;
        float32 wip = 1.0;

        if (cmd_ln_exists_r(config, "-lw"))
            lw = cmd_ln_float32_r(config, "-lw");
        if (cmd_ln_exists_r(config, "-wip"))
            wip = cmd_ln_float32_r(config, "-wip");

        ngram_model_apply_weights(model, lw, wip);
    }

    return model;
}
Exemple #5
0
glist_t
srch_FLAT_FWD_nbest_impl(void *srch,           /**< A void pointer to a search structure */
                         dag_t * dag)
{
    srch_t *s;
    srch_FLAT_FWD_graph_t *fwg;
    float32 bestpathlw;
    float64 lwf;
    char str[2000];

    s = (srch_t *) srch;
    fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct;
    assert(fwg->lathist);

    if (!(cmd_ln_exists_r(kbcore_config(fwg->kbcore), "-nbestdir")
          && cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir")))
        return NULL;
    ctl_outfile(str, cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestdir"),
                cmd_ln_str_r(kbcore_config(fwg->kbcore), "-nbestext"),
                (s->uttfile ? s->uttfile : s->uttid), s->uttid,
                cmd_ln_boolean_r(kbcore_config(fwg->kbcore), "-build_outdirs"));

    bestpathlw = cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-bestpathlw");
    lwf = bestpathlw ? (bestpathlw / cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-lw")) : 1.0;

    flat_fwd_dag_add_fudge_edges(fwg,
				 dag,
				 cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-dagfudge"),
				 cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-min_endfr"),
				 (void *) fwg->lathist, s->kbc->dict);

    /* Bypass filler nodes */
    if (!dag->filler_removed) {
        /* If Viterbi search terminated in filler word coerce final DAG node to FINISH_WORD */
        if (dict_filler_word(s->kbc->dict, dag->end->wid))
            dag->end->wid = s->kbc->dict->finishwid;
        dag_remove_unreachable(dag);
        if (dag_bypass_filler_nodes(dag, lwf, s->kbc->dict, s->kbc->fillpen) < 0)
            E_ERROR("maxedge limit (%d) exceeded\n", dag->maxedge);
    }

    dag_compute_hscr(dag, kbcore_dict(s->kbc), kbcore_lm(s->kbc), lwf);
    dag_remove_bypass_links(dag);
    dag->filler_removed = 0;

    nbest_search(dag, str, s->uttid, lwf,
                 kbcore_dict(s->kbc),
                 kbcore_lm(s->kbc), kbcore_fillpen(s->kbc)
        );
    return NULL;
}
Exemple #6
0
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	logmath_t *lmath;
	acmod_t *acmod[5];
	sbthread_t *thr[5];
	featbuf_t *fb;
	FILE *raw;
	int16 buf[2048];
	int nsamp;
	int i;

	config = cmd_ln_init(NULL, ps_args(), TRUE,
			     "-hmm", TESTDATADIR "/hub4wsj_sc_8k",
			     "-lm", TESTDATADIR "/bn10000.3g.arpa",
			     "-dict", TESTDATADIR "/bn10000.dic",
			     "-compallsen", "yes",
			     NULL);
	ps_init_defaults(config);
	fb = featbuf_init(config);
	TEST_ASSERT(fb);

	lmath = logmath_init(cmd_ln_float32_r(config, "-logbase"),
			     0, FALSE);
	acmod[0] = acmod_init(config, lmath, fb);
	TEST_ASSERT(acmod[0]);
	/* Create a couple threads to pull features out of it. */
	for (i = 0; i < 5; ++i) {
		if (i != 0)
			acmod[i] = acmod_copy(acmod[0]);
		thr[i] = sbthread_start(NULL, consumer, acmod[i]);
	}

	/* Feed them some data. */
	raw = fopen(TESTDATADIR "/chan3.raw", "rb");
	featbuf_producer_start_utt(fb, "chan3");
	while ((nsamp = fread(buf, 2, 2048, raw)) > 0) {
		int rv;
		rv = featbuf_producer_process_raw(fb, buf, nsamp, FALSE);
		printf("Producer processed %d samples\n", nsamp);
		TEST_ASSERT(rv > 0);
	}
	fclose(raw);
	printf("Waiting for consumers\n");
	featbuf_producer_end_utt(fb);
	printf("Finished waiting\n");

	/* Reap those threads. */
	for (i = 0; i < 5; ++i) {
		sbthread_wait(thr[i]);
		sbthread_free(thr[i]);
		acmod_free(acmod[i]);
		printf("Reaped consumer %p\n", acmod[i]);
	}
	featbuf_free(fb);
	logmath_free(lmath);
	cmd_ln_free_r(config);
	return 0;
}
Exemple #7
0
int 
ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string)
{
  fsg_model_t *fsg;
  jsgf_rule_t *rule;
  char const *toprule;
  jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL);
  float lw;
  int result;

  if (!jsgf)
      return -1;

  rule = NULL;
  /* Take the -toprule if specified. */
  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
      rule = jsgf_get_rule(jsgf, toprule);
      if (rule == NULL) {
          E_ERROR("Start rule %s not found\n", toprule);
          return -1;
      }
  } else {
      rule = jsgf_get_public_rule(jsgf);
      if (rule == NULL) {
          E_ERROR("No public rules found in input string\n");
          return -1;
      }
  }

  lw = cmd_ln_float32_r(ps->config, "-lw");
  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
  result = ps_set_fsg(ps, name, fsg);
  fsg_model_free(fsg);
  return result;
}
Exemple #8
0
static int
acmod_init_feat(acmod_t *acmod)
{
    acmod->fcb =
        feat_init(cmd_ln_str_r(acmod->config, "-feat"),
                  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
                  cmd_ln_boolean_r(acmod->config, "-varnorm"),
                  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
                  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
    if (acmod->fcb == NULL)
        return -1;

    if (cmd_ln_str_r(acmod->config, "-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str_r(acmod->config, "-lda"));
        if (feat_read_lda(acmod->fcb,
                          cmd_ln_str_r(acmod->config, "-lda"),
                          cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n",
               cmd_ln_str_r(acmod->config, "-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists_r(acmod->config, "-agcthresh")
        && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
        agc_set_threshold(acmod->fcb->agc_struct,
                          cmd_ln_float32_r(acmod->config, "-agcthresh"));
    }

    if (acmod->fcb->cmn_struct
        && cmd_ln_exists_r(acmod->config, "-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < acmod->fcb->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
        }
        ckd_free(vallist);
    }
    return 0;
}
bool FSpeechRecognitionWorker::Init() {

	std::string modelPath = contentPath_str + "model/" + langStr + "/" + langStr;
	std::string languageModel = contentPath_str + "model/" + langStr + "/" + langStr + ".lm.bin";
	std::string dictionaryPath = contentPath_str + "model/" + langStr + "/" + langStr + ".dict";

	// load dictionary
	dictionaryMap.clear();

	std::ifstream file(dictionaryPath);
	std::vector<std::string> words;
	std::string currentLine;

	while (file.good())
	{
		std::getline(file, currentLine);
		std::string word = currentLine.substr(0, currentLine.find(" "));
		std::string phrase = currentLine.substr(currentLine.find(" ") + 1, currentLine.size());
		dictionaryMap.insert(make_pair(word, phrase));
	}

	// Start Sphinx
	config = cmd_ln_init(NULL, ps_args(), 1,
		"-hmm", modelPath.c_str(),
		"-lm", languageModel.c_str(),
		NULL);
	
	ps = ps_init(config);

	if (!Manager | !ps) {
		ClientMessage(FString(TEXT("Speech Recognition Thread failed to start")));
		initSuccess = false;
		return false;
	}

	// only include the words/phrases that have been added
	for (auto It = dictionaryList.CreateConstIterator(); It; ++It)
	{
		FString word = *It;
		std::string wordStr = std::string(TCHAR_TO_UTF8(*word));
		if (dictionaryMap.find(wordStr) != dictionaryMap.end())
		{
			std::string phraseStr = dictionaryMap.at(wordStr);
			ps_add_word(ps, wordStr.c_str(), phraseStr.c_str(), TRUE);
		}
	}

	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
			ClientMessage(FString(TEXT("Failed to open audio device")));
			initSuccess = false;
			return initSuccess;
	}

	utt_started = 0;
	return true;
}
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ngram_model_t *lm = NULL;
	logmath_t *lmath;
	const char *lmfn, *probdefn, *lsnfn, *text;

	if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
		return 1;

        verbose = cmd_ln_boolean_r(config, "-verbose");

	/* Create log math object. */
	if ((lmath = logmath_init
	     (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
		E_FATAL("Failed to initialize log math\n");
	}

	/* Load the language model. */
	lmfn = cmd_ln_str_r(config, "-lm");
	if (lmfn == NULL
	    || (lm = ngram_model_read(config, lmfn,
				      NGRAM_AUTO, lmath)) == NULL) {
		E_FATAL("Failed to load language model from %s\n",
			cmd_ln_str_r(config, "-lm"));
	}
        if ((probdefn = cmd_ln_str_r(config, "-probdef")) != NULL)
            ngram_model_read_classdef(lm, probdefn);
        ngram_model_apply_weights(lm,
                                  cmd_ln_float32_r(config, "-lw"),
                                  cmd_ln_float32_r(config, "-wip"),
                                  cmd_ln_float32_r(config, "-uw"));

	/* Now evaluate some text. */
	lsnfn = cmd_ln_str_r(config, "-lsn");
	text = cmd_ln_str_r(config, "-text");
	if (lsnfn) {
		evaluate_file(lm, lmath, lsnfn);
	}
	else if (text) {
		evaluate_string(lm, lmath, text);
	}

	return 0;
}
Exemple #11
0
static void
models_init(void)
{

    mdef = mdef_init(cmd_ln_str_r(config, "-mdef"), 1);

    dict = dict_init(mdef,
                     cmd_ln_str_r(config, "-dict"),
                     cmd_ln_str_r(config, "-fdict"),
		     cmd_ln_boolean_r(config, "-lts_mismatch"),
		     cmd_ln_boolean_r(config, "-mdef_fillers"),
		     FALSE, TRUE);

    lmset = lmset_init(cmd_ln_str_r(config, "-lm"),
                       cmd_ln_str_r(config, "-lmctlfn"),
                       cmd_ln_str_r(config, "-ctl_lm"),
                       cmd_ln_str_r(config, "-lmname"),
                       cmd_ln_str_r(config, "-lmdumpdir"),
                       cmd_ln_float32_r(config, "-lw"),
                       cmd_ln_float32_r(config, "-wip"),
                       cmd_ln_float32_r(config, "-uw"), dict,
                       logmath);

    /* Filler penalties */
    fpen = fillpen_init(dict, cmd_ln_str_r(config, "-fillpen"),
                        cmd_ln_float32_r(config, "-silprob"),
                        cmd_ln_float32_r(config, "-fillprob"),
                        cmd_ln_float32_r(config, "-lw"),
                        cmd_ln_float32_r(config, "-wip"),
                        logmath);

}
Exemple #12
0
int
srch_FLAT_FWD_srch_one_frame_lv2(void *srch)
{
    int32 bestscr;              /* Best state score for any whmm evaluated in this frame */
    int32 whmm_thresh;          /* Threshold for any whmm to stay alive in search */
    int32 word_thresh;          /* Threshold for a word-final whmm to succeed */
    int32 phone_penalty;
    srch_FLAT_FWD_graph_t *fwg;
    srch_t *s;

    s = (srch_t *) srch;
    fwg = (srch_FLAT_FWD_graph_t *) s->grh->graph_struct;

    ptmr_start(&(fwg->tm_hmmeval));
    bestscr = whmm_eval(fwg, s->ascr->senscr);
    /*  E_INFO("bestscr %d RENORM_THRESH %d\n",bestscr, RENORM_THRESH); */
    ptmr_stop(&(fwg->tm_hmmeval));

    whmm_thresh = bestscr + s->beam->hmm;
    word_thresh = bestscr + s->beam->word;
    phone_penalty = logs3(kbcore_logmath(s->kbc), cmd_ln_float32_r(kbcore_config(fwg->kbcore), "-phonepen"));

    assert(s->ascr->senscr);
    /*  E_INFO("fwg->n_frm %d\n",fwg->n_frm); */
    dump_fwd_dbg_info(fwg, fwg->fwdDBG, s->ascr, bestscr, whmm_thresh,
                      word_thresh, s->ascr->senscr);

    {
        ptmr_start(&(fwg->tm_hmmtrans));
        fwg->lathist->frm_latstart[fwg->n_frm] = fwg->lathist->n_lat_entry;
        whmm_exit(fwg, fwg->whmm, fwg->lathist,
		  whmm_thresh, word_thresh,
                  phone_penalty);
        ptmr_stop(&(fwg->tm_hmmtrans));

        /* Please read, the In whmm_exit, if word ends are reach,
           n_lat_entry will increase, see whmm_exit(). Then word_trans
           will be triggered.
         */

        ptmr_start(&(fwg->tm_wdtrans));
        if (fwg->lathist->frm_latstart[fwg->n_frm] < fwg->lathist->n_lat_entry)
            word_trans(fwg, fwg->whmm, fwg->lathist, whmm_thresh, phone_penalty);
        ptmr_stop(&(fwg->tm_wdtrans));
    }

    if (bestscr < RENORM_THRESH) {
        E_INFO("Frame %d: bestscore= %d; renormalizing\n", fwg->n_frm,
               bestscr);
        whmm_renorm(fwg, fwg->whmm, bestscr);
    }

    fwg->lathist->n_frm++;
    fwg->n_frm++;
    return SRCH_SUCCESS;
}
int32
gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config)
{
    int32 i, m, f, d, *flen;
    float32 ****fgau;

    /* Reload means and variances (un-precomputed). */
    fgau = NULL;
    gauden_param_read(&fgau, &g->n_mgau, &g->n_feat, &g->n_density,
                      &g->featlen, cmd_ln_str_r(config, "-mean"));
    g->mean = (mfcc_t ****)fgau;
    fgau = NULL;
    gauden_param_read(&fgau, &m, &f, &d, &flen, cmd_ln_str_r(config, "-var"));
    g->var = (mfcc_t ****)fgau;

    /* Verify mean and variance parameter dimensions */
    if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density))
        E_FATAL
            ("Mixture-gaussians dimensions for means and variances differ\n");
    for (i = 0; i < g->n_feat; i++)
        if (g->featlen[i] != flen[i])
            E_FATAL("Feature lengths for means and variances differ\n");
    ckd_free(flen);

    /* Transform codebook for each stream s */
    for (i = 0; i < g->n_mgau; ++i) {
        for (f = 0; f < g->n_feat; ++f) {
            float64 *temp;
            temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64));
            /* Transform each density d in selected codebook */
            for (d = 0; d < g->n_density; d++) {
                int l;
                for (l = 0; l < g->featlen[f]; l++) {
                    temp[l] = 0.0;
                    for (m = 0; m < g->featlen[f]; m++) {
                        /* FIXME: For now, only one class, hence the zeros below. */
                        temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m];
                    }
                    temp[l] += mllr->b[f][0][l];
                }

                for (l = 0; l < g->featlen[f]; l++) {
                    g->mean[i][f][d][l] = (float32) temp[l];
                    g->var[i][f][d][l] *= mllr->h[f][0][l];
                }
            }
            ckd_free(temp);
        }
    }

    /* Re-precompute (if we aren't adapting variances this isn't
     * actually necessary...) */
    gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor"));
    return 0;
}
int
main(int argc, char *argv[])
{
    ngram_trie_t *t;
    dict_t *dict;
    bin_mdef_t *mdef;
    logmath_t *lmath;
    cmd_ln_t *config;
    FILE *arpafh;

    config = cmd_ln_init(NULL, ps_args(), TRUE,
                         "-hmm", TESTDATADIR "/hub4wsj_sc_8k",
                         "-dict", TESTDATADIR "/bn10000.homos.dic",
                         NULL);
    ps_init_defaults(config);

    lmath = logmath_init(cmd_ln_float32_r(config, "-logbase"),
                         0, FALSE);
    mdef = bin_mdef_read(config, cmd_ln_str_r(config, "-mdef"));
    dict = dict_init(config, mdef);

    t = ngram_trie_init(dict, lmath);
    arpafh = fopen(TESTDATADIR "/bn10000.3g.arpa", "r");
    ngram_trie_read_arpa(t, arpafh);
    fclose(arpafh);

    /* Test 1, 2, 3-gram probs without backoff. */
    test_lookups(t, lmath);

    arpafh = fopen("tmp.bn10000.3g.arpa", "w");
    ngram_trie_write_arpa(t, arpafh);
    fclose(arpafh);
    ngram_trie_free(t);

    t = ngram_trie_init(dict, lmath);
    arpafh = fopen("tmp.bn10000.3g.arpa", "r");
    ngram_trie_read_arpa(t, arpafh);
    fclose(arpafh);

    /* Test 1, 2, 3-gram probs without backoff. */
    test_lookups(t, lmath);

    /* Test adding nodes. */
    test_add_nodes(t, lmath);

    ngram_trie_free(t);

    dict_free(dict);
    logmath_free(lmath);
    bin_mdef_free(mdef);
    cmd_ln_free_r(config);

    return 0;
}
Exemple #15
0
static int
acmod_init_am(acmod_t *acmod)
{
    char const *mdeffn, *tmatfn;

    /* Read model definition. */
    if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
        E_ERROR("Must specify -mdef or -hmm\n");
        return -1;
    }

    if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
        E_ERROR("Failed to read model definition from %s\n", mdeffn);
        return -1;
    }

    /* Read transition matrices. */
    if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
        E_ERROR("No tmat file specified\n");
        return -1;
    }
    acmod->tmat = tmat_init(tmatfn, acmod->lmath,
                            cmd_ln_float32_r(acmod->config, "-tmatfloor"),
                            TRUE);

    /* Read the acoustic models. */
    if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
            || (cmd_ln_str_r(acmod->config, "-var") == NULL)
            || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
        E_ERROR("No mean/var/tmat files specified\n");
        return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-senmgau")) {
        E_INFO("Using general multi-stream GMM computation\n");
        acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
        if (acmod->mgau == NULL)
            return -1;
    }
    else {
        E_INFO("Attempting to use SCHMM computation module\n");
        if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
            E_INFO("Attempting to use PTHMM computation module\n");
            if ((acmod->mgau = ptm_mgau_init(acmod)) == NULL) {
                E_INFO("Falling back to general multi-stream GMM computation\n");
                acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
                if (acmod->mgau == NULL)
                    return -1;
            }
        }
    }

    return 0;
}
Exemple #16
0
int
main(int argc, char *argv[])
{
    print_appl_info(argv[0]);
    cmd_ln_appl_enter(argc, argv, "default.arg", defn);

    unlimit();

    config = cmd_ln_get();

    logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1,
                         cmd_ln_int32_r(config, "-log3table"));

    E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase"));
    models_init();
    ptmr_init(&tm_utt);

    if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL)
        E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg"));


    if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL)
        E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output"));

    if (cmd_ln_str_r(config, "-ctl")) {
        ctl_process(cmd_ln_str_r(config, "-ctl"),
                    cmd_ln_str_r(config, "-ctl_lm"),
                    NULL,
                    cmd_ln_int32_r(config, "-ctloffset"),
                    cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL);
    }
    else {
        E_FATAL("-ctl is not specified\n");
    }

#if (! WIN32)
    system("ps auxwww | grep s3dag");
#endif

    fclose(outconfmatchsegfp);
    fclose(inmatchsegfp);

    models_free();

    logmath_free(logmath);

    cmd_ln_free_r(config);

    return 0;

}
static int
phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
{
    phone_loop_search_t *pls = (phone_loop_search_t *)search;
    cmd_ln_t *config = ps_search_config(search);
    acmod_t *acmod = ps_search_acmod(search);
    int i;

    /* Free old dict2pid, dict, if necessary. */
    ps_search_base_reinit(search, dict, d2p);

    /* Initialize HMM context. */
    if (pls->hmmctx)
        hmm_context_free(pls->hmmctx);
    pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
                                   acmod->tmat->tp, NULL, acmod->mdef->sseq);
    if (pls->hmmctx == NULL)
        return -1;

    /* Initialize penalty storage */
    pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
    pls->window = cmd_ln_int32_r(config, "-pl_window");
    if (pls->penalties)
        ckd_free(pls->penalties);
    pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties));
    if (pls->pen_buf)
        ckd_free_2d(pls->pen_buf);
    pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf));

    /* Initialize phone HMMs. */
    if (pls->hmms) {
        for (i = 0; i < pls->n_phones; ++i)
            hmm_deinit((hmm_t *)&pls->hmms[i]);
        ckd_free(pls->hmms);
    }
    pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms));
    for (i = 0; i < pls->n_phones; ++i) {
        hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i],
                 FALSE,
                 bin_mdef_pid2ssid(acmod->mdef, i),
                 bin_mdef_pid2tmatid(acmod->mdef, i));
    }
    pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight");
    pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT;
    pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT;
    pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT;
    E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
           pls->beam, pls->pbeam, pls->pip);

    return 0;
}
Exemple #18
0
void
s3_decode_read_lm(s3_decode_t * _decode,
           const char *lmpath, const char *lmname)
{
    srch_t *s;
    lm_t *lm;
    int32 ndict;
    s = (srch_t *) _decode->kb.srch;

    ndict = dict_size(_decode->kb.kbcore->dict);


    lm = lm_read_advance(lmpath, lmname,
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-lw"),
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-wip"),
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-uw"),
                         ndict, NULL, 1,   /* Weight apply */
                         kbcore_logmath(s->kbc),
                         cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-ugonly"),
                         cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-bgonly")
        );

    s->funcs->add_lm(s, lm, lmname);
}
Exemple #19
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
{
  fsg_model_t *fsg;
  jsgf_rule_t *rule;
  char const *toprule;
  jsgf_t *jsgf = jsgf_parse_file(path, NULL);
  float lw;
  int result;

  if (!jsgf)
      return -1;

  rule = NULL;
  /* Take the -toprule if specified. */
  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
      char *ruletok;
      ruletok = string_join("<", toprule, ">", NULL);
      rule = jsgf_get_rule(jsgf, ruletok);
      ckd_free(ruletok);
      if (rule == NULL) {
          E_ERROR("Start rule %s not found\n", toprule);
          return -1;
      }
  } else {
      /* Otherwise, take the first public rule. */
      jsgf_rule_iter_t *itor;

      for (itor = jsgf_rule_iter(jsgf); itor;
           itor = jsgf_rule_iter_next(itor)) {
          rule = jsgf_rule_iter_rule(itor);
          if (jsgf_rule_public(rule)) {
              jsgf_rule_iter_free(itor);
              break;
          }
      }
      if (rule == NULL) {
          E_ERROR("No public rules found in %s\n", path);
          return -1;
      }
  }

  lw = cmd_ln_float32_r(ps->config, "-lw");
  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
  result = ps_set_fsg(ps, name, fsg);
  fsg_model_free(fsg);
  return result;
}
Exemple #21
0
int
main(int argc, char *argv[])
{
    cmd_ln_t *config;
    char const *cfg;

    /* Make sure we exit cleanly (needed for profiling among other things) */
    /* Signals seem to be broken in arm-wince-pe. */
#if !defined(GNUWINCE) && !defined(_WIN32_WCE)
    signal(SIGINT, &sighandler);
#endif

    if (argc == 2) {
        config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
    }
    else {
        config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
    }
    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }
    if (config == NULL)
        return 1;
    ps = ps_init(config);
    if (ps == NULL)
        return 1;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("ad_open_dev failed\n");

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (setjmp(jbuf) == 0) {
        utterance_loop();
    }

    ps_free(ps);
    ad_close(ad);

    return 0;
}
uint32 FSpeechRecognitionWorker::Run() {

	char const *hyp;
	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
		ClientMessage(FString(TEXT("Failed to open audio device")));
		return 1;
	}
	if (ad_start_rec(ad) < 0) {
		ClientMessage(FString(TEXT("Failed to start recording")));
		return 2;
	}
	if (ps_start_utt(ps) < 0) {
		ClientMessage(FString(TEXT("Failed to start utterance")));
		return 3;
	}

	while (StopTaskCounter.GetValue() == 0) {
		if ((k = ad_read(ad, adbuf, 1024)) < 0)
			ClientMessage(FString(TEXT("Failed to read audio")));
		ps_process_raw(ps, adbuf, k, 0, 0);
		in_speech = ps_get_in_speech(ps);
		if (in_speech && !utt_started) {
			utt_started = 1;
		}
		if (!in_speech && utt_started) {
			/* speech -> silence transition, time to start new utterance  */
			ps_end_utt(ps);
			hyp = ps_get_hyp(ps, NULL);
			if (hyp != NULL)
				Manager->WordSpoken_method(FString(hyp));

			if (ps_start_utt(ps) < 0)
				ClientMessage(FString(TEXT("Failed to start")));
			utt_started = 0;
		}
	}

	ad_close(ad);
	return 0;
}
Exemple #23
0
static int
process_fsgctl_line(ps_decoder_t *ps, cmd_ln_t *config, char const *fname)
{
    fsg_model_t *fsg;
    int err;
    char *path = NULL;
    const char *fsgdir = cmd_ln_str_r(config, "-fsgdir");
    const char *fsgext = cmd_ln_str_r(config, "-fsgext");

    if (fname == NULL)
        return 0;

    if (fsgdir)
        path = string_join(fsgdir, "/", fname, fsgext ? fsgext : "", NULL);
    else if (fsgext)
        path = string_join(fname, fsgext, NULL);
    else
        path = ckd_salloc(fname);

    fsg = fsg_model_readfile(path, ps_get_logmath(ps),
                                          cmd_ln_float32_r(config, "-lw"));
    err = 0;
    if (!fsg) {
        err = -1;
        goto error_out;
    }
    if (ps_set_fsg(ps, fname, fsg)) {
        err = -1;
        goto error_out;
    }

    E_INFO("Using FSG: %s\n", fname);
    if (ps_set_search(ps, fname))
        err = -1;

error_out:
    fsg_model_free(fsg);
    ckd_free(path);

    return err;
}
Exemple #24
0
/**
 * Output HTK format header.
 */
static int
output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat)
{
    int32 samp_period;
    int16 samp_size;
    int16 param_kind;
    int swap = FALSE;

    /* HTK files are big-endian. */
    if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian")))
        swap = TRUE;
    /* Same file size thing as in Sphinx files (I think) */
    if (swap) SWAP_INT32(&nfloat);
    if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample period in 100ns units. */
    samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate"));
    if (swap) SWAP_INT32(&samp_period);
    if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample size - veclen * sizeof each sample. */
    samp_size = wtf->veclen * 4;
    if (swap) SWAP_INT16(&samp_size);
    if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1)
        return -1;
    /* Format and flags. */
    if (cmd_ln_boolean_r(wtf->config, "-logspec")
        || cmd_ln_boolean_r(wtf->config, "-cep2spec"))
        param_kind = FBANK; /* log mel-filter bank outputs */
    else
        param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */
    if (swap) SWAP_INT16(&param_kind);
    if (fwrite(&param_kind, 2, 1, wtf->outfh) != 1)
        return -1;

    return 0;
}
Exemple #25
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
	E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
    	    printf("%s\n", hyp);
    	    if (print_times) {
    		print_word_times();
	    }
	}
    }
    
    fclose(rawfd);
}
int
main(int argc, char *argv[])
{
    char const *cfg;
    int i;
    int16 buf[2048];

    if (argc == 2) {
        config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
    }
    else {
        config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
    }
    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }
    if (config == NULL)
        return 1;

    singlefile = cmd_ln_boolean_r(config, "-singlefile");
    if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) {
        if ((infile = fopen(infile_path, "rb")) == NULL) {
            E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path);
            return 1;
        }
        read_audio = &read_audio_file;
        /* skip wav header */
        read_audio(buf, 44);
    }
    else {
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                              (int) cmd_ln_float32_r(config,
                                                     "-samprate"))) ==
            NULL) {
            E_FATAL("Failed to open audio device\n");
            return 1;
        }
        read_audio = &read_audio_adev;
        printf("Start recording ...\n");
        fflush(stdout);
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");

        /* TODO remove this thing */
        for (i = 0; i < 5; i++) {
            sleep_msec(200);
            read_audio(buf, 2048);
        }
        printf("You may speak now\n");
        fflush(stdout);
    }

    fe = fe_init_auto_r(config);
    if (fe == NULL)
        return 1;

    segment_audio();

    if (ad)
        ad_close(ad);
    if (infile)
        fclose(infile);

    fe_free(fe);
    cmd_ln_free_r(config);
    return 0;
}
void
segment_audio()
{
    FILE *file;
    int16 pcm_buf[BLOCKSIZE];
    mfcc_t **cep_buf;
    int16 voiced_buf = NULL;
    int32 voiced_nsamps, out_frameidx, uttstart = 0;
    char file_name[1024];
    uint8 cur_vad_state, vad_state, writing;
    int uttno, uttlen, sample_rate;
    int32 nframes, nframes_tmp;
    int16 frame_size, frame_shift, frame_rate;
    size_t k;

    sample_rate = (int) cmd_ln_float32_r(config, "-samprate");
    frame_rate = cmd_ln_int32_r(config, "-frate");
    frame_size =
        (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5);
    frame_shift =
        (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5);
    nframes = (BLOCKSIZE - frame_size) / frame_shift;
    cep_buf =
        (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe),
                                  sizeof(mfcc_t));

    uttno = 0;
    uttlen = 0;
    cur_vad_state = 0;
    voiced_nsamps = 0;
    writing = 0;
    file = NULL;
    fe_start_stream(fe);
    fe_start_utt(fe);
    while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) {
        int16 const *pcm_buf_tmp;
        pcm_buf_tmp = &pcm_buf[0];
        while (k) {
            nframes_tmp = nframes;
            fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf,
                                  &nframes_tmp, voiced_buf,
                                  &voiced_nsamps, &out_frameidx);
            if (out_frameidx > 0) {
        	uttstart = out_frameidx;
            }
            vad_state = fe_get_vad_state(fe);
            if (!cur_vad_state && vad_state) {
                /* silence->speech transition, time to start new file */
                uttno++;
                if (!singlefile) {
                    sprintf(file_name, "%s%04d.raw", infile_path, uttno);
                    if ((file = fopen(file_name, "wb")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
                } else {
                    sprintf(file_name, "%s.raw", infile_path);
                    if ((file = fopen(file_name, "ab")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
		}
		writing = 1;
            }

            if (writing && file && voiced_nsamps > 0) {
                fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file);
                uttlen += voiced_nsamps;
            }

            if (cur_vad_state && !vad_state) {
                /* speech -> silence transition, time to finish file */
                fclose(file);
	        printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    		       uttno,
    		       file_name,
    	    	       ((double) uttstart) / frame_rate,
            	        uttlen,
            	       ((double) uttlen) / sample_rate);
                fflush(stdout);
                fe_end_utt(fe, cep_buf[0], &nframes_tmp);
                writing = 0;
                uttlen = 0;
                voiced_nsamps = 0;
                fe_start_utt(fe);
            }
            cur_vad_state = vad_state;
        }
    }

    if (writing) {
        fclose(file);
	printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    	        uttno,
    		file_name,
    	    	((double) uttstart) / frame_rate,
            	uttlen,
                ((double) uttlen) / sample_rate);
        fflush(stdout);
    }
    fe_end_utt(fe, cep_buf[0], &nframes);
    ckd_free_2d(cep_buf);
}
Exemple #28
0
static int
acmod_init_am(acmod_t *acmod)
{
    char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;

    /* Read model definition. */
    if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
        if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL)
            E_ERROR("Acoustic model definition is not specified either "
                    "with -mdef option or with -hmm\n");
        else
            E_ERROR("Folder '%s' does not contain acoustic model "
                    "definition 'mdef'\n", hmmdir);

        return -1;
    }

    if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
        E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
        return -1;
    }

    /* Read transition matrices. */
    if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
        E_ERROR("No tmat file specified\n");
        return -1;
    }
    acmod->tmat = tmat_init(tmatfn, acmod->lmath,
                            cmd_ln_float32_r(acmod->config, "-tmatfloor"),
                            TRUE);

    /* Read the acoustic models. */
    if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
        || (cmd_ln_str_r(acmod->config, "-var") == NULL)
        || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
        E_ERROR("No mean/var/tmat files specified\n");
        return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-senmgau")) {
        E_INFO("Using general multi-stream GMM computation\n");
        acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
        if (acmod->mgau == NULL)
            return -1;
    }
    else {
        E_INFO("Attempting to use PTM computation module\n");
        if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
            E_INFO("Attempting to use semi-continuous computation module\n");
            if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
                E_INFO("Falling back to general multi-stream GMM computation\n");
                acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
                if (acmod->mgau == NULL)
                    return -1;
            }
        }
    }

    /* If there is an MLLR transform, apply it. */
    if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
        ps_mllr_t *mllr = ps_mllr_read(mllrfn);
        if (mllr == NULL)
            return -1;
        acmod_update_mllr(acmod, mllr);
    }

    return 0;
}
Exemple #29
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 * 	   wait for start of next utterance;
 * 	   decode utterance until silence of at least 1 sec observed;
 * 	   print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    char word[256];
	char c1[256], c2[256];

	int tracking = 0;
	int halted = 0;
	int LEFT = 0;
	int RIGHT = 1;
	int MOVE_CENT = 100; //1 meter
	int numwords;

	setlinebuf(stdout);

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");

    /* Initialize continuous listening module */
    if ((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");


	printf("LEDON BLUE\n");
    for (;;) {
        /* Indicate listening for next utterance */
        fprintf(stderr, "READY....\n");
        fflush(stderr);

        /* Wait data for next utterance */
        while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
            sleep_msec(100);

        if (k < 0)
            E_FATAL("Failed to read audio\n");

        /*
         * Non-zero amount of data received; start recognition of new utterance.
         * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
         */
        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("Failed to start utterance\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        fprintf(stderr, "Listening...\n");

        /* Note timestamp for this first block of data */
        ts = cont->read_ts;

        /* Decode utterance until end (marked by a "long" silence, >1sec) */
        for (;;) {
            /* Read non-silence audio data, if any, from continuous listening module */
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                E_FATAL("Failed to read audio\n");
            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }

            /*
             * Decode whatever data was read above.
             */
            rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

            /* If no work to be done, sleep a bit */
            if ((rem == 0) && (k == 0))
                sleep_msec(20);
        }

        /*
         * Utterance ended; flush any accumulated, unprocessed A/D data and stop
         * listening until current utterance completely decoded
         */
        ad_stop_rec(ad);
        while (ad_read(ad, adbuf, 4096) >= 0);
        cont_ad_reset(cont);

        fprintf(stderr, "Stopped listening, please wait...\n");
        fflush(stdout);
        /* Finish decoding, obtain and print result */
        ps_end_utt(ps);
        hyp = ps_get_hyp(ps, NULL, &uttid);
        fprintf(stderr, "%s: %s\n", uttid, hyp);

        /* Exit if the first word spoken was GOODBYE */
        if (hyp) {
			numwords = sscanf(hyp, "%s %s %s", word, c1, c2);
			if(strcmp(word, "GUGGUG") == 0) {
				if(strcmp(c1, "HALT") == 0) {
					printf("LEDOFF BLUE\n");
					halted = 1;
				} else if(strcmp(c1, "RESUME") == 0) {
					printf("LEDON BLUE\n");
					halted = 0;
				}
				if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && !tracking) {
						printf("START TRACKING\n");
						tracking = 1;
						halted = 0;
					}
				} else if(strcmp(c1, "STOP") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && tracking) {
						printf("STOP TRACKING\n");
						tracking = 0;
					}
				}
				if(!tracking && !halted && numwords == 3) {
					if(strcmp(c1, "TURN") == 0) {
						if(strcmp(c2, "AROUND") == 0) {
							printf("TURN %d 180\n", LEFT);
						} else if(strcmp(c2, "LEFT") == 0) {
							printf("TURN %d 90\n", LEFT);
						} else if(strcmp(c2, "RIGHT") == 0) {
							printf("TURN %d 90\n", RIGHT);
						}
					} else if(strcmp(c1, "MOVE") == 0) {
						if(strcmp(c2, "FORWARD") == 0) {
							printf("MOVE 0 %d\n", MOVE_CENT);
						} else if(strcmp(c2, "BACKWARD") == 0) {
							printf("MOVE 1 %d\n", MOVE_CENT);
						}
					}
				}
			}
        }

        /* Resume A/D recording for next utterance */
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    }

    cont_ad_close(cont);
    ad_close(ad);
}
Exemple #30
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file() {
    cont_ad_t *cont;
    ad_rec_t file_ad = {0};
    int16 adbuf[4096];
    const char* hyp;
    const char* uttid;
    int32 k, ts, start;

    char waveheader[44];
    if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) {
	E_FATAL_SYSTEM("Failed to open file '%s' for reading",
			cmd_ln_str_r(config, "-infile"));
    }

    fread(waveheader, 1, 44, rawfd);

    file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate");
    file_ad.bps = sizeof(int16);

    if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) {
        E_FATAL("Failed to initialize voice activity detection");
    }
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");
    rewind (rawfd);

    for (;;) {

	while ((k = cont_ad_read(cont, adbuf, 4096)) == 0);

        if (k < 0) {
    	    break;
    	}

        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("ps_start_utt() failed\n");

        ps_process_raw(ps, adbuf, k, FALSE, FALSE);

        ts = cont->read_ts;
        start = ((ts - k) * 100.0) / file_ad.sps;

        for (;;) {
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
            	break;

            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }


            ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        }

        ps_end_utt(ps);

        if (cmd_ln_boolean_r(config, "-time")) {
	    print_word_times(start);
	} else {
	    hyp = ps_get_hyp(ps, NULL, &uttid);
            fprintf(stderr, "%s: %s\n", uttid, hyp);
        }
        fflush(stdout);
    }

    cont_ad_close(cont);
    fclose(rawfd);
}