Example #1
0
int
ps_load_dict(ps_decoder_t *ps, char const *dictfile,
             char const *fdictfile, char const *format)
{
    dict2pid_t *d2p;
    dict_t *dict;
    hash_iter_t *search_it;
    cmd_ln_t *newconfig;

    /* Create a new scratch config to load this dict (so existing one
     * won't be affected if it fails) */
    newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL);
    cmd_ln_set_boolean_r(newconfig, "-dictcase",
                         cmd_ln_boolean_r(ps->config, "-dictcase"));
    cmd_ln_set_str_r(newconfig, "-dict", dictfile);
    if (fdictfile)
        cmd_ln_set_str_extra_r(newconfig, "_fdict", fdictfile);
    else
        cmd_ln_set_str_extra_r(newconfig, "_fdict",
                               cmd_ln_str_r(ps->config, "_fdict"));

    /* Try to load it. */
    if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) {
        cmd_ln_free_r(newconfig);
        return -1;
    }

    /* Reinit the dict2pid. */
    if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) {
        cmd_ln_free_r(newconfig);
        return -1;
    }

    /* Success!  Update the existing config to reflect new dicts and
     * drop everything into place. */
    cmd_ln_free_r(newconfig);
    dict_free(ps->dict);
    ps->dict = dict;
    dict2pid_free(ps->d2p);
    ps->d2p = d2p;

    /* And tell all searches to reconfigure themselves. */
    for (search_it = hash_table_iter(ps->searches); search_it;
       search_it = hash_table_iter_next(search_it)) {
        if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) {
            hash_table_iter_free(search_it);
            return -1;
        }
    }

    return 0;
}
Example #2
0
int
acmod_write_senfh_header(acmod_t *acmod, FILE *logfh)
{
    char nsenstr[64], logbasestr[64];

    sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef));
    sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath));
    return bio_writehdr(logfh,
                        "version", "0.1",
                        "mdef_file", cmd_ln_str_r(acmod->config, "-mdef"),
                        "n_sen", nsenstr,
                        "logbase", logbasestr, NULL);
}
Example #3
0
static void
ps_expand_model_config(ps_decoder_t *ps)
{
    char const *hmmdir, *featparams;

    /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
#ifdef __ADSPBLACKFIN__
    E_INFO("Will not use mmap() on uClinux/Blackfin.");
    cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
#endif

    /* Get acoustic model filenames and add them to the command-line */
    if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
        ps_add_file(ps, "-mdef", hmmdir, "mdef");
        ps_add_file(ps, "-mean", hmmdir, "means");
        ps_add_file(ps, "-var", hmmdir, "variances");
        ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
        ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
        ps_add_file(ps, "-sendump", hmmdir, "sendump");
        ps_add_file(ps, "-fdict", hmmdir, "noisedict");
        ps_add_file(ps, "-lda", hmmdir, "feature_transform");
        ps_add_file(ps, "-featparams", hmmdir, "feat.params");
        ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
    }

    /* Look for feat.params in acoustic model dir. */
    if ((featparams = cmd_ln_str_r(ps->config, "-featparams"))) {
        if (NULL !=
            cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE))
            E_INFO("Parsed model-specific feature parameters from %s\n",
                    featparams);
    }

    /* Print here because acmod_init might load feat.params file */
    if (err_get_logfp() != NULL) {
        cmd_ln_print_values_r(ps->config, err_get_logfp(), ps_args());
    }
}
Example #4
0
int
main(int argc, char *argv[])
{
    char const *cfg;

    config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }

    if (config == NULL || (cmd_ln_str_r(config, "-infile") == NULL && cmd_ln_boolean_r(config, "-inmic") == FALSE)) {
	E_INFO("Specify '-infile <file.wav>' to recognize from file or '-inmic yes' to recognize from microphone.\n");
        cmd_ln_free_r(config);
	return 1;
    }

    ps_default_search_args(config);
    ps = ps_init(config);
    if (ps == NULL) {
        cmd_ln_free_r(config);
        return 1;
    }

    E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);

    if (cmd_ln_str_r(config, "-infile") != NULL) {
        recognize_from_file();
    } else if (cmd_ln_boolean_r(config, "-inmic")) {
        recognize_from_microphone();
    }

    ps_free(ps);
    cmd_ln_free_r(config);

    return 0;
}
Example #5
0
int
main(int32 argc, char *argv[])
{
    ps_decoder_t *ps;
    cmd_ln_t *config;
    char const *ctl;
    FILE *ctlfh;

    config = cmd_ln_parse_r(NULL, ps_args_def, argc, argv, TRUE);

    /* Handle argument file as -argfile. */
    if (config && (ctl = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, ps_args_def, ctl, FALSE);
    }
    
    if (config == NULL) {
        /* This probably just means that we got no arguments. */
        return 1;
    }

    if ((ctl = cmd_ln_str_r(config, "-ctl")) == NULL) {
        E_FATAL("-ctl argument not present, nothing to do in batch mode!\n");
    }
    if ((ctlfh = fopen(ctl, "r")) == NULL) {
        E_FATAL_SYSTEM("Failed to open control file '%s'", ctl);
    }

    ps_default_search_args(config);
    if (!(ps = ps_init(config)))
        E_FATAL("PocketSphinx decoder init failed\n");

    process_ctl(ps, config, ctlfh);

    fclose(ctlfh);
    ps_free(ps);
    return 0;
}
Example #6
0
void
ps_default_search_args(cmd_ln_t *config)
{
#ifdef MODELDIR
    /* Set default acoustic and language models. */
    const char *hmmdir = cmd_ln_str_r(config, "-hmm");
    if (hmmdir == NULL && hmmdir_exists(MODELDIR "/hmm/en_US/hub4wsj_sc_8k")) {
        hmmdir = MODELDIR "/hmm/en_US/hub4wsj_sc_8k";
        cmd_ln_set_str_r(config, "-hmm", hmmdir);
    }

    const char *lmfile = cmd_ln_str_r(config, "-lm");

    if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg")
        && !cmd_ln_str_r(config, "-jsgf")
        && !cmd_ln_str_r(config, "-kws")
        && !cmd_ln_str_r(config, "-keyphrase")
        && file_exists(MODELDIR "/lm/en_US/hub4.5000.DMP")) {
        lmfile = MODELDIR "/lm/en_US/hub4.5000.DMP";
        cmd_ln_set_str_r(config, "-lm", lmfile);
    }

    const char *dictfile = cmd_ln_str_r(config, "-dict");
    if (dictfile == NULL && file_exists(MODELDIR "/lm/en_US/cmu07a.dic")) {
        dictfile = MODELDIR "/lm/en_US/cmu07a.dic";
        cmd_ln_set_str_r(config, "-dict", dictfile);
    }

    /* Expand acoustic and language model filenames relative to installation
     * path. */
    if (hmmdir && !path_is_absolute(hmmdir) && !hmmdir_exists(hmmdir)) {
        char *tmphmm = string_join(MODELDIR "/hmm/", hmmdir, NULL);
        if (hmmdir_exists(tmphmm)) {
            cmd_ln_set_str_r(config, "-hmm", tmphmm);
        } else {
            E_ERROR("Failed to find mdef file inside the model folder "
                    "specified with -hmm `%s'\n", hmmdir);
        }
        ckd_free(tmphmm);
    }
    if (lmfile && !path_is_absolute(lmfile) && !file_exists(lmfile)) {
        char *tmplm = string_join(MODELDIR "/lm/", lmfile, NULL);
        cmd_ln_set_str_r(config, "-lm", tmplm);
        ckd_free(tmplm);
    }
    if (dictfile && !path_is_absolute(dictfile) && !file_exists(dictfile)) {
        char *tmpdict = string_join(MODELDIR "/lm/", dictfile, NULL);
        cmd_ln_set_str_r(config, "-dict", tmpdict);
        ckd_free(tmpdict);
    }
#endif
}
Example #7
0
int
s3_decode_init(s3_decode_t * _decode, cmd_ln_t *_config)
{
    if (_decode == NULL)
        return S3_DECODE_ERROR_NULL_POINTER;

    /* capture decoder parameters */
    kb_init(&_decode->kb, _config);

    /* initialize decoder variables */
    _decode->kbcore = _decode->kb.kbcore;
    _decode->hyp_frame_num = -1;
    _decode->uttid = NULL;
    _decode->state = S3_DECODE_STATE_IDLE;
    _decode->hyp_str = NULL;
    _decode->hyp_segs = NULL;

    _decode->swap =
            strcmp(cmd_ln_str_r(_config,"-machine_endian"),
                   cmd_ln_str_r(_config,"-input_endian"));

    if (_decode->swap)
        E_INFO("Input data WILL be byte swapped\n");
    else
        E_INFO("Input data will NOT be byte swapped\n");

    _decode->phypdump = (cmd_ln_int32_r(_config, "-phypdump"));

    if (_decode->phypdump)
        E_INFO("Partial hypothesis WILL be dumped\n");
    else
        E_INFO("Partial hypothesis will NOT be dumped\n");

    _decode->rawext = (cmd_ln_str_r(_config, "-rawext"));

    return S3_DECODE_SUCCESS;
}
Example #8
0
/*
 * Continuous recognition from mic
 */
int
recognize_from_mic()
{
	ad_rec_t *ad;
    int16 adbuf[2048];
    const char *fname;
	const char* seg;
    int32 k;
	char str[1000]="";
    uint8 utt_started, in_speech;
	
    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),16000)) == NULL)
		perror("Failed to open audio device\n");
	if (ad_start_rec(ad) < 0)
		perror("Failed to start recording\n");
    
    ps_start_utt(ps);
    utt_started = FALSE;
	ps_seg_t *psegt;
    while (!finished) {
		if ((k = ad_read(ad, adbuf, 2048)) < 0)
			perror("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
			psegt = ps_seg_iter(ps, NULL);
			while (psegt!=NULL){
				seg = ps_seg_word(psegt);
				strncpy_s( str, seg, strlen(seg));
				listenCallback(str);
				printf("%s\n", seg);
				int prob = ps_seg_prob(psegt,NULL,NULL,NULL);
				printf("%d\n", prob);
				psegt = ps_seg_next(psegt);
			}
            ps_start_utt(ps);
            utt_started = FALSE;
        }
		Sleep(100);
    }
	
    ps_end_utt(ps);
    fclose(rawfd);
	return 0;
}
Example #9
0
int
main(int argc, char *argv[])
{
    print_appl_info(argv[0]);
    cmd_ln_appl_enter(argc, argv, "default.arg", defn);

    unlimit();

    config = cmd_ln_get();

    logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1,
                         cmd_ln_int32_r(config, "-log3table"));

    E_INFO("Value of base %f \n", cmd_ln_float32_r(config, "-logbase"));
    models_init();
    ptmr_init(&tm_utt);

    if ((inmatchsegfp = fopen(cmd_ln_str_r(config, "-inhypseg"), "r")) == NULL)
        E_ERROR("fopen(%s,r) failed\n", cmd_ln_str_r(config, "-inhypseg"));


    if ((outconfmatchsegfp = fopen(cmd_ln_str_r(config, "-output"), "w")) == NULL)
        E_ERROR("fopen(%s,w) failed\n", cmd_ln_str_r(config, "-output"));

    if (cmd_ln_str_r(config, "-ctl")) {
        ctl_process(cmd_ln_str_r(config, "-ctl"),
                    cmd_ln_str_r(config, "-ctl_lm"),
                    NULL,
                    cmd_ln_int32_r(config, "-ctloffset"),
                    cmd_ln_int32_r(config, "-ctlcount"), utt_confidence, NULL);
    }
    else {
        E_FATAL("-ctl is not specified\n");
    }

#if (! WIN32)
    system("ps auxwww | grep s3dag");
#endif

    fclose(outconfmatchsegfp);
    fclose(inmatchsegfp);

    models_free();

    logmath_free(logmath);

    cmd_ln_free_r(config);

    return 0;

}
Example #10
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 *        start utterance and wait for speech to process
 *        decoding till end-of-utterance silence will be detected
 *        print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[2048];
    uint8 utt_started, in_speech;
    int32 k;
    char const *hyp;

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int) cmd_ln_float32_r(config,
                                                 "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");

    if (ps_start_utt(ps) < 0)
        E_FATAL("Failed to start utterance\n");
    utt_started = FALSE;
    E_INFO("Ready....\n");

    for (;;) {
        if ((k = ad_read(ad, adbuf, 2048)) < 0)
            E_FATAL("Failed to read audio\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
            E_INFO("Listening...\n");
        }
        if (!in_speech && utt_started) {
            /* speech -> silence transition, time to start new utterance  */
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL );
            if (hyp != NULL) {
                printf("%s\n", hyp);
                fflush(stdout);
            }

            if (ps_start_utt(ps) < 0)
                E_FATAL("Failed to start utterance\n");
            utt_started = FALSE;
            E_INFO("Ready....\n");
        }
        sleep_msec(100);
    }
    ad_close(ad);
}
int
main(int argc, char *argv[])
{
    cmd_ln_t *config;

    config = cmd_ln_parse_r(NULL, defs, argc, argv, TRUE);
    if (config == NULL)
        return 1;
    printf("%d %s %d %f\n",
           cmd_ln_int32_r(config, "-a"),
           cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)",
           cmd_ln_boolean_r(config, "-c"),
           cmd_ln_float64_r(config, "-d"));
    cmd_ln_free_r(config);

    config = cmd_ln_init(NULL, NULL, FALSE,
                         "-b", "foobie", NULL);
    if (config == NULL)
        return 1;
    cmd_ln_free_r(config);

    config = cmd_ln_init(NULL, defs, TRUE,
                         "-b", "foobie", NULL);
    if (config == NULL)
        return 1;
    printf("%d %s %d %f\n",
           cmd_ln_int32_r(config, "-a"),
           cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)",
           cmd_ln_boolean_r(config, "-c"),
           cmd_ln_float64_r(config, "-d"));
    cmd_ln_free_r(config);

    config = cmd_ln_init(NULL, NULL, FALSE,
                         "-b", "foobie", NULL);
    if (config == NULL)
        return 1;
    printf("%s\n",
           cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)");
    cmd_ln_set_str_r(config, "-b", "blatz");
    printf("%s\n",
           cmd_ln_str_r(config, "-b") ? cmd_ln_str_r(config, "-b") : "(null)");
    cmd_ln_free_r(config);
           
    return 0;
}
Example #12
0
int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
{
  fsg_model_t *fsg;
  jsgf_rule_t *rule;
  char const *toprule;
  jsgf_t *jsgf = jsgf_parse_file(path, NULL);
  float lw;
  int result;

  if (!jsgf)
      return -1;

  rule = NULL;
  /* Take the -toprule if specified. */
  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
      char *ruletok;
      ruletok = string_join("<", toprule, ">", NULL);
      rule = jsgf_get_rule(jsgf, ruletok);
      ckd_free(ruletok);
      if (rule == NULL) {
          E_ERROR("Start rule %s not found\n", toprule);
          return -1;
      }
  } else {
      /* Otherwise, take the first public rule. */
      jsgf_rule_iter_t *itor;

      for (itor = jsgf_rule_iter(jsgf); itor;
           itor = jsgf_rule_iter_next(itor)) {
          rule = jsgf_rule_iter_rule(itor);
          if (jsgf_rule_public(rule)) {
              jsgf_rule_iter_free(itor);
              break;
          }
      }
      if (rule == NULL) {
          E_ERROR("No public rules found in %s\n", path);
          return -1;
      }
  }

  lw = cmd_ln_float32_r(ps->config, "-lw");
  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
  result = ps_set_fsg(ps, name, fsg);
  fsg_model_free(fsg);
  return result;
}
Example #13
0
static int
acmod_init_am(acmod_t *acmod)
{
    char const *mdeffn, *tmatfn;

    /* Read model definition. */
    if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
        E_ERROR("Must specify -mdef or -hmm\n");
        return -1;
    }

    if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
        E_ERROR("Failed to read model definition from %s\n", mdeffn);
        return -1;
    }

    /* Read transition matrices. */
    if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
        E_ERROR("No tmat file specified\n");
        return -1;
    }
    acmod->tmat = tmat_init(tmatfn, acmod->lmath,
                            cmd_ln_float32_r(acmod->config, "-tmatfloor"),
                            TRUE);

    /* Read the acoustic models. */
    if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
            || (cmd_ln_str_r(acmod->config, "-var") == NULL)
            || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
        E_ERROR("No mean/var/tmat files specified\n");
        return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-senmgau")) {
        E_INFO("Using general multi-stream GMM computation\n");
        acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
        if (acmod->mgau == NULL)
            return -1;
    }
    else {
        E_INFO("Attempting to use SCHMM computation module\n");
        if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
            E_INFO("Attempting to use PTHMM computation module\n");
            if ((acmod->mgau = ptm_mgau_init(acmod)) == NULL) {
                E_INFO("Falling back to general multi-stream GMM computation\n");
                acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
                if (acmod->mgau == NULL)
                    return -1;
            }
        }
    }

    return 0;
}
Example #14
0
/**
 * Process Sphinx MFCCs/logspectra from a filehandle.  Assume that
 * wtf->infh is positioned just after the file header.
 */
static int
decode_sphinx_mfc(sphinx_wave2feat_t *wtf)
{
    int nfloat = 0, n;
    int featsize = wtf->featsize;

    /* If the input vector length is less than the output length, we
     * need to do this one frame at a time, because there's empty
     * space at the end of each vector in wtf->feat. */
    if (wtf->in_veclen < wtf->veclen)
        featsize = 1;
    while ((n = fread(wtf->feat[0], sizeof(**wtf->feat),
                      featsize * wtf->in_veclen, wtf->infh)) != 0) {
        int i, nfr = n / wtf->in_veclen;
        if (n % wtf->in_veclen) {
            E_ERROR("Size of file %d not a multiple of veclen %d\n",
                    n, wtf->in_veclen);
            return -1;
        }
        /* Byteswap stuff here if necessary. */
        if (wtf->byteswap) {
            for (i = 0; i < n; ++i)
                SWAP_FLOAT32(wtf->feat[0] + i);
        }
        fe_float_to_mfcc(wtf->fe, (float32 **)wtf->feat, wtf->feat, nfr);
        for (i = 0; i < nfr; ++i) {
            if (cmd_ln_boolean_r(wtf->config, "-spec2cep")) {
                if (0 == strcmp(cmd_ln_str_r(wtf->config, "-transform"), "legacy"))
                    fe_logspec_to_mfcc(wtf->fe, wtf->feat[i], wtf->feat[i]);
                else
                    fe_logspec_dct2(wtf->fe, wtf->feat[i], wtf->feat[i]);
            }
            else if (cmd_ln_boolean_r(wtf->config, "-cep2spec")) {
                fe_mfcc_dct3(wtf->fe, wtf->feat[i], wtf->feat[i]);
            }
        }
        if ((n = (*wtf->ot->output_frames)(wtf, wtf->feat, nfr)) < 0)
            return -1;
        nfloat += n;
    }

    if (fclose(wtf->infh) == EOF)
        E_ERROR_SYSTEM("Failed to close input file");
    wtf->infh = NULL;
    return nfloat;
}
Example #15
0
int
main(int argc, char *argv[])
{
    jsgf_t *jsgf;
    fsg_model_t *fsg;
    cmd_ln_t *config;
        
    if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
	return 1;
		
    if (cmd_ln_boolean_r(config, "-help")) {
        usagemsg(argv[0]);
    }

    jsgf = jsgf_parse_file(cmd_ln_str_r(config, "-jsgf"), NULL);
    if (jsgf == NULL) {
        return 1;
    }

    fsg = get_fsg(jsgf, cmd_ln_str_r(config, "-rule") ? cmd_ln_str_r(config, "-rule") : NULL);
    
    if (cmd_ln_boolean_r(config, "-compile")) {
	fsg_model_null_trans_closure(fsg, NULL);
    }

    
    if (cmd_ln_str_r(config, "-fsm")) {
	const char* outfile = cmd_ln_str_r(config, "-fsm");
	const char* symfile = cmd_ln_str_r(config, "-symtab");
        if (outfile)
            fsg_model_writefile_fsm(fsg, outfile);
        else
            fsg_model_write_fsm(fsg, stdout);
        if (symfile)
            fsg_model_writefile_symtab(fsg, symfile);
    }
    else {
        const char *outfile = cmd_ln_str_r(config, "-fsg");
        if (outfile)
            fsg_model_writefile(fsg, outfile);
        else
            fsg_model_write(fsg, stdout);
    }
    fsg_model_free(fsg);
    jsgf_grammar_free(jsgf);

    return 0;
}
static void
ps_expand_file_config(ps_decoder_t *ps, const char *arg, const char *extra_arg,
	              const char *hmmdir, const char *file)
{
    const char *val;
    if ((val = cmd_ln_str_r(ps->config, arg)) != NULL) {
	cmd_ln_set_str_extra_r(ps->config, extra_arg, val);
    } else if (hmmdir == NULL) {
        cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
    } else {
        char *tmp = string_join(hmmdir, "/", file, NULL);
        if (file_exists(tmp))
	    cmd_ln_set_str_extra_r(ps->config, extra_arg, tmp);
	else
	    cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL);
        ckd_free(tmp);
    }
}
uint32 FSpeechRecognitionWorker::Run() {

	char const *hyp;
	// attempt to open the default recording device
	if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
		(int)cmd_ln_float32_r(config,
		"-samprate"))) == NULL) {
		ClientMessage(FString(TEXT("Failed to open audio device")));
		return 1;
	}
	if (ad_start_rec(ad) < 0) {
		ClientMessage(FString(TEXT("Failed to start recording")));
		return 2;
	}
	if (ps_start_utt(ps) < 0) {
		ClientMessage(FString(TEXT("Failed to start utterance")));
		return 3;
	}

	while (StopTaskCounter.GetValue() == 0) {
		if ((k = ad_read(ad, adbuf, 1024)) < 0)
			ClientMessage(FString(TEXT("Failed to read audio")));
		ps_process_raw(ps, adbuf, k, 0, 0);
		in_speech = ps_get_in_speech(ps);
		if (in_speech && !utt_started) {
			utt_started = 1;
		}
		if (!in_speech && utt_started) {
			/* speech -> silence transition, time to start new utterance  */
			ps_end_utt(ps);
			hyp = ps_get_hyp(ps, NULL);
			if (hyp != NULL)
				Manager->WordSpoken_method(FString(hyp));

			if (ps_start_utt(ps) < 0)
				ClientMessage(FString(TEXT("Failed to start")));
			utt_started = 0;
		}
	}

	ad_close(ad);
	return 0;
}
int 
ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path)
{
  fsg_model_t *fsg;
  jsgf_rule_t *rule;
  char const *toprule;
  jsgf_t *jsgf = jsgf_parse_file(path, NULL);
  float lw;
  int result;

  if (!jsgf)
      return -1;

  rule = NULL;
  /* Take the -toprule if specified. */
  if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) {
      rule = jsgf_get_rule(jsgf, toprule);
      if (rule == NULL) {
          E_ERROR("Start rule %s not found\n", toprule);
          jsgf_grammar_free(jsgf);
          return -1;
      }
  } else {
      rule = jsgf_get_public_rule(jsgf);
      if (rule == NULL) {
          E_ERROR("No public rules found in %s\n", path);
          jsgf_grammar_free(jsgf);
          return -1;
      }
  }

  lw = cmd_ln_float32_r(ps->config, "-lw");
  fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw);
  result = ps_set_fsg(ps, name, fsg);
  fsg_model_free(fsg);
  jsgf_grammar_free(jsgf);
  return result;
}
Example #19
0
static fwd_dbg_t *
init_fwd_dbg(srch_FLAT_FWD_graph_t * fwg)
{
    const char *tmpstr;
    fwd_dbg_t *fd;

    fd = (fwd_dbg_t *) ckd_calloc(1, sizeof(fwd_dbg_t));

    assert(fd);
    /* Word to be traced in detail */
    if ((tmpstr = cmd_ln_str_r(kbcore_config(fwg->kbcore), "-tracewhmm")) != NULL) {
        fd->trace_wid = dict_wordid(fwg->kbcore->dict, tmpstr);
        if (NOT_S3WID(fd->trace_wid))
            E_ERROR("%s not in dictionary; cannot be traced\n", tmpstr);
    }
    else
        fd->trace_wid = BAD_S3WID;

    /* Active words to be dumped for debugging after and before the given frame nos, if any */
    fd->word_dump_sf = (int32) 0x7ffffff0;
    if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf"))
        fd->word_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpsf");

    fd->word_dump_ef = (int32) 0x7ffffff0;
    if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef"))
        fd->word_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-worddumpef");

    /* Active HMMs to be dumped for debugging after and before the given frame nos, if any */
    fd->hmm_dump_sf = (int32) 0x7ffffff0;
    if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf"))
        fd->hmm_dump_sf = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpsf");

    fd->hmm_dump_ef = (int32) 0x7ffffff0;
    if (cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef"))
        fd->hmm_dump_ef = cmd_ln_int32_r(kbcore_config(fwg->kbcore), "-hmmdumpef");

    return fd;
}
Example #20
0
/**
 * Output HTK format header.
 */
static int
output_header_htk(sphinx_wave2feat_t *wtf, int32 nfloat)
{
    int32 samp_period;
    int16 samp_size;
    int16 param_kind;
    int swap = FALSE;

    /* HTK files are big-endian. */
    if (0 == strcmp("little", cmd_ln_str_r(wtf->config, "-mach_endian")))
        swap = TRUE;
    /* Same file size thing as in Sphinx files (I think) */
    if (swap) SWAP_INT32(&nfloat);
    if (fwrite(&nfloat, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample period in 100ns units. */
    samp_period = (int32)(1e+7 / cmd_ln_float32_r(wtf->config, "-frate"));
    if (swap) SWAP_INT32(&samp_period);
    if (fwrite(&samp_period, 4, 1, wtf->outfh) != 1)
        return -1;
    /* Sample size - veclen * sizeof each sample. */
    samp_size = wtf->veclen * 4;
    if (swap) SWAP_INT16(&samp_size);
    if (fwrite(&samp_size, 2, 1, wtf->outfh) != 1)
        return -1;
    /* Format and flags. */
    if (cmd_ln_boolean_r(wtf->config, "-logspec")
        || cmd_ln_boolean_r(wtf->config, "-cep2spec"))
        param_kind = FBANK; /* log mel-filter bank outputs */
    else
        param_kind = MFCC | _O; /* MFCC + CEP0 (note reordering...) */
    if (swap) SWAP_INT16(&param_kind);
    if (fwrite(&param_kind, 2, 1, wtf->outfh) != 1)
        return -1;

    return 0;
}
Example #21
0
static void
ps_init_defaults(ps_decoder_t *ps)
{
    /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */
#ifdef __ADSPBLACKFIN__
    E_INFO("Will not use mmap() on uClinux/Blackfin.");
    cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE);
#endif

    char const *hmmdir;
    /* Get acoustic model filenames and add them to the command-line */
    if ((hmmdir = cmd_ln_str_r(ps->config, "-hmm")) != NULL) {
        ps_add_file(ps, "-mdef", hmmdir, "mdef");
        ps_add_file(ps, "-mean", hmmdir, "means");
        ps_add_file(ps, "-var", hmmdir, "variances");
        ps_add_file(ps, "-tmat", hmmdir, "transition_matrices");
        ps_add_file(ps, "-mixw", hmmdir, "mixture_weights");
        ps_add_file(ps, "-sendump", hmmdir, "sendump");
        ps_add_file(ps, "-fdict", hmmdir, "noisedict");
        ps_add_file(ps, "-lda", hmmdir, "feature_transform");
        ps_add_file(ps, "-featparams", hmmdir, "feat.params");
        ps_add_file(ps, "-senmgau", hmmdir, "senmgau");
    }
}
int32
gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config)
{
    int32 i, m, f, d, *flen;
    float32 ****fgau;

    /* Free data if already here */
    if (g->mean)
        gauden_param_free(g->mean);
    if (g->var)
        gauden_param_free(g->var);
    if (g->det)
        ckd_free_3d(g->det);
    if (g->featlen)
        ckd_free(g->featlen);
    g->mean = 0;
    g->var = 0;
    g->det = 0;
    g->featlen = 0;

    /* Reload means and variances (un-precomputed). */
    fgau = 0;
    gauden_param_read(&fgau, &g->n_mgau, &g->n_feat, &g->n_density,
                      &g->featlen, cmd_ln_str_r(config, "-mean"));
    g->mean = (mfcc_t ****)fgau;
    fgau = 0;
    gauden_param_read(&fgau, &m, &f, &d, &flen, cmd_ln_str_r(config, "-var"));
    g->var = (mfcc_t ****)fgau;

    /* Verify mean and variance parameter dimensions */
    if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density))
        E_FATAL
            ("Mixture-gaussians dimensions for means and variances differ\n");
    for (i = 0; i < g->n_feat; i++)
        if (g->featlen[i] != flen[i])
            E_FATAL("Feature lengths for means and variances differ\n");
    ckd_free(flen);

    /* Transform codebook for each stream s */
    for (i = 0; i < g->n_mgau; ++i) {
        for (f = 0; f < g->n_feat; ++f) {
            float64 *temp;
            temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64));
            /* Transform each density d in selected codebook */
            for (d = 0; d < g->n_density; d++) {
                int l;
                for (l = 0; l < g->featlen[f]; l++) {
                    temp[l] = 0.0;
                    for (m = 0; m < g->featlen[f]; m++) {
                        /* FIXME: For now, only one class, hence the zeros below. */
                        temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m];
                    }
                    temp[l] += mllr->b[f][0][l];
                }

                for (l = 0; l < g->featlen[f]; l++) {
                    g->mean[i][f][d][l] = (float32) temp[l];
                    g->var[i][f][d][l] *= mllr->h[f][0][l];
                }
            }
            ckd_free(temp);
        }
    }

    /* Re-precompute (if we aren't adapting variances this isn't
     * actually necessary...) */
    gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor"));
    return 0;
}
Example #23
0
/*
 * Main utterance processing loop:
 *     for (;;) {
 * 	   wait for start of next utterance;
 * 	   decode utterance until silence of at least 1 sec observed;
 * 	   print utterance result;
 *     }
 */
static void
recognize_from_microphone()
{
    ad_rec_t *ad;
    int16 adbuf[4096];
    int32 k, ts, rem;
    char const *hyp;
    char const *uttid;
    cont_ad_t *cont;
    char word[256];
	char c1[256], c2[256];

	int tracking = 0;
	int halted = 0;
	int LEFT = 0;
	int RIGHT = 1;
	int MOVE_CENT = 100; //1 meter
	int numwords;

	setlinebuf(stdout);

    if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                          (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
        E_FATAL("Failed to open audio device\n");

    /* Initialize continuous listening module */
    if ((cont = cont_ad_init(ad, ad_read)) == NULL)
        E_FATAL("Failed to initialize voice activity detection\n");
    if (ad_start_rec(ad) < 0)
        E_FATAL("Failed to start recording\n");
    if (cont_ad_calib(cont) < 0)
        E_FATAL("Failed to calibrate voice activity detection\n");


	printf("LEDON BLUE\n");
    for (;;) {
        /* Indicate listening for next utterance */
        fprintf(stderr, "READY....\n");
        fflush(stderr);

        /* Wait data for next utterance */
        while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
            sleep_msec(100);

        if (k < 0)
            E_FATAL("Failed to read audio\n");

        /*
         * Non-zero amount of data received; start recognition of new utterance.
         * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
         */
        if (ps_start_utt(ps, NULL) < 0)
            E_FATAL("Failed to start utterance\n");
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        fprintf(stderr, "Listening...\n");

        /* Note timestamp for this first block of data */
        ts = cont->read_ts;

        /* Decode utterance until end (marked by a "long" silence, >1sec) */
        for (;;) {
            /* Read non-silence audio data, if any, from continuous listening module */
            if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
                E_FATAL("Failed to read audio\n");
            if (k == 0) {
                /*
                 * No speech data available; check current timestamp with most recent
                 * speech to see if more than 1 sec elapsed.  If so, end of utterance.
                 */
                if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
                    break;
            }
            else {
                /* New speech data received; note current timestamp */
                ts = cont->read_ts;
            }

            /*
             * Decode whatever data was read above.
             */
            rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);

            /* If no work to be done, sleep a bit */
            if ((rem == 0) && (k == 0))
                sleep_msec(20);
        }

        /*
         * Utterance ended; flush any accumulated, unprocessed A/D data and stop
         * listening until current utterance completely decoded
         */
        ad_stop_rec(ad);
        while (ad_read(ad, adbuf, 4096) >= 0);
        cont_ad_reset(cont);

        fprintf(stderr, "Stopped listening, please wait...\n");
        fflush(stdout);
        /* Finish decoding, obtain and print result */
        ps_end_utt(ps);
        hyp = ps_get_hyp(ps, NULL, &uttid);
        fprintf(stderr, "%s: %s\n", uttid, hyp);

        /* Exit if the first word spoken was GOODBYE */
        if (hyp) {
			numwords = sscanf(hyp, "%s %s %s", word, c1, c2);
			if(strcmp(word, "GUGGUG") == 0) {
				if(strcmp(c1, "HALT") == 0) {
					printf("LEDOFF BLUE\n");
					halted = 1;
				} else if(strcmp(c1, "RESUME") == 0) {
					printf("LEDON BLUE\n");
					halted = 0;
				}
				if(strcmp(c1, "BEGIN") == 0 || strcmp(c1, "START") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && !tracking) {
						printf("START TRACKING\n");
						tracking = 1;
						halted = 0;
					}
				} else if(strcmp(c1, "STOP") == 0) {
					if(strcmp(c2, "TRACKING") == 0 && tracking) {
						printf("STOP TRACKING\n");
						tracking = 0;
					}
				}
				if(!tracking && !halted && numwords == 3) {
					if(strcmp(c1, "TURN") == 0) {
						if(strcmp(c2, "AROUND") == 0) {
							printf("TURN %d 180\n", LEFT);
						} else if(strcmp(c2, "LEFT") == 0) {
							printf("TURN %d 90\n", LEFT);
						} else if(strcmp(c2, "RIGHT") == 0) {
							printf("TURN %d 90\n", RIGHT);
						}
					} else if(strcmp(c1, "MOVE") == 0) {
						if(strcmp(c2, "FORWARD") == 0) {
							printf("MOVE 0 %d\n", MOVE_CENT);
						} else if(strcmp(c2, "BACKWARD") == 0) {
							printf("MOVE 1 %d\n", MOVE_CENT);
						}
					}
				}
			}
        }

        /* Resume A/D recording for next utterance */
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");
    }

    cont_ad_close(cont);
    ad_close(ad);
}
Example #24
0
static int
acmod_init_feat(acmod_t *acmod)
{
    acmod->fcb =
        feat_init(cmd_ln_str_r(acmod->config, "-feat"),
                  cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
                  cmd_ln_boolean_r(acmod->config, "-varnorm"),
                  agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
                  1, cmd_ln_int32_r(acmod->config, "-ceplen"));
    if (acmod->fcb == NULL)
        return -1;

    if (cmd_ln_str_r(acmod->config, "-lda")) {
        E_INFO("Reading linear feature transformation from %s\n",
               cmd_ln_str_r(acmod->config, "-lda"));
        if (feat_read_lda(acmod->fcb,
                          cmd_ln_str_r(acmod->config, "-lda"),
                          cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
            return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-svspec")) {
        int32 **subvecs;
        E_INFO("Using subvector specification %s\n",
               cmd_ln_str_r(acmod->config, "-svspec"));
        if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
            return -1;
        if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
            return -1;
    }

    if (cmd_ln_exists_r(acmod->config, "-agcthresh")
        && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
        agc_set_threshold(acmod->fcb->agc_struct,
                          cmd_ln_float32_r(acmod->config, "-agcthresh"));
    }

    if (acmod->fcb->cmn_struct
        && cmd_ln_exists_r(acmod->config, "-cmninit")) {
        char *c, *cc, *vallist;
        int32 nvals;

        vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
        c = vallist;
        nvals = 0;
        while (nvals < acmod->fcb->cmn_struct->veclen
               && (cc = strchr(c, ',')) != NULL) {
            *cc = '\0';
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
            c = cc + 1;
            ++nvals;
        }
        if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
            acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c));
        }
        ckd_free(vallist);
    }
    return 0;
}
Example #25
0
/*
 * Continuous recognition from a file
 */
static void
recognize_from_file()
{
    int16 adbuf[2048];
    const char *fname;
    const char *hyp;
    int32 k;
    uint8 utt_started, in_speech;
    int32 print_times = cmd_ln_boolean_r(config, "-time");

    fname = cmd_ln_str_r(config, "-infile");
    if ((rawfd = fopen(fname, "rb")) == NULL) {
        E_FATAL_SYSTEM("Failed to open file '%s' for reading",
                       fname);
    }
    
    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".wav") == 0) {
        char waveheader[44];
	fread(waveheader, 1, 44, rawfd);
	if (!check_wav_header(waveheader, (int)cmd_ln_float32_r(config, "-samprate")))
    	    E_FATAL("Failed to process file '%s' due to format mismatch.\n", fname);
    }

    if (strlen(fname) > 4 && strcmp(fname + strlen(fname) - 4, ".mp3") == 0) {
	E_FATAL("Can not decode mp3 files, convert input file to WAV 16kHz 16-bit mono before decoding.\n");
    }
    
    ps_start_utt(ps);
    utt_started = FALSE;

    while ((k = fread(adbuf, sizeof(int16), 2048, rawfd)) > 0) {
        ps_process_raw(ps, adbuf, k, FALSE, FALSE);
        in_speech = ps_get_in_speech(ps);
        if (in_speech && !utt_started) {
            utt_started = TRUE;
        } 
        if (!in_speech && utt_started) {
            ps_end_utt(ps);
            hyp = ps_get_hyp(ps, NULL);
            if (hyp != NULL)
        	printf("%s\n", hyp);
            if (print_times)
        	print_word_times();
            fflush(stdout);

            ps_start_utt(ps);
            utt_started = FALSE;
        }
    }
    ps_end_utt(ps);
    if (utt_started) {
        hyp = ps_get_hyp(ps, NULL);
        if (hyp != NULL) {
    	    printf("%s\n", hyp);
    	    if (print_times) {
    		print_word_times();
	    }
	}
    }
    
    fclose(rawfd);
}
static void
gst_pocketsphinx_get_property(GObject * object, guint prop_id,
                              GValue * value, GParamSpec * pspec)
{
    GstPocketSphinx *ps = GST_POCKETSPHINX(object);

    switch (prop_id) {
    case PROP_DECODER:
        g_value_set_boxed(value, ps->ps);
        break;
    case PROP_CONFIGURED:
        g_value_set_boolean(value, ps->ps != NULL);
        break;
    case PROP_HMM_DIR:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm"));
        break;
    case PROP_LM_FILE:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm"));
        break;
    case PROP_LMCTL_FILE:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl"));
        break;
    case PROP_LM_NAME:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname"));
        break;
    case PROP_DICT_FILE:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict"));
        break;
    case PROP_MLLR_FILE:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr"));
        break;
    case PROP_FSG_FILE:
        g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg"));
        break;
    case PROP_FWDFLAT:
        g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat"));
        break;
    case PROP_BESTPATH:
        g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath"));
        break;
    case PROP_LATDIR:
        g_value_set_string(value, ps->latdir);
        break;
    case PROP_MAXHMMPF:
        g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf"));
        break;
    case PROP_MAXWPF:
        g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf"));
        break;
    case PROP_BEAM:
        g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam"));
        break;
    case PROP_PBEAM:
        g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam"));
        break;
    case PROP_WBEAM:
        g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam"));
        break;
    case PROP_DSRATIO:
        g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds"));
        break;
    default:
        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
        break;
    }
}
int
main(int argc, char *argv[])
{
    char const *cfg;
    int i;
    int16 buf[2048];

    if (argc == 2) {
        config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
    }
    else {
        config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
    }
    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }
    if (config == NULL)
        return 1;

    singlefile = cmd_ln_boolean_r(config, "-singlefile");
    if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) {
        if ((infile = fopen(infile_path, "rb")) == NULL) {
            E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path);
            return 1;
        }
        read_audio = &read_audio_file;
        /* skip wav header */
        read_audio(buf, 44);
    }
    else {
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                              (int) cmd_ln_float32_r(config,
                                                     "-samprate"))) ==
            NULL) {
            E_FATAL("Failed to open audio device\n");
            return 1;
        }
        read_audio = &read_audio_adev;
        printf("Start recording ...\n");
        fflush(stdout);
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");

        /* TODO remove this thing */
        for (i = 0; i < 5; i++) {
            sleep_msec(200);
            read_audio(buf, 2048);
        }
        printf("You may speak now\n");
        fflush(stdout);
    }

    fe = fe_init_auto_r(config);
    if (fe == NULL)
        return 1;

    segment_audio();

    if (ad)
        ad_close(ad);
    if (infile)
        fclose(infile);

    fe_free(fe);
    cmd_ln_free_r(config);
    return 0;
}
Example #28
0
int
main(int argc, char *argv[])
{
	cmd_ln_t *config;
	ngram_model_t *lm = NULL;
	logmath_t *lmath;
        int itype, otype;
        char const *kase;

	if ((config = cmd_ln_parse_r(NULL, defn, argc, argv, TRUE)) == NULL)
		return 1;
		
	if (cmd_ln_boolean_r(config, "-help")) {
	    usagemsg(argv[0]);
	}

        err_set_debug_level(cmd_ln_int32_r(config, "-debug"));

	/* Create log math object. */
	if ((lmath = logmath_init
	     (cmd_ln_float64_r(config, "-logbase"), 0, 0)) == NULL) {
		E_FATAL("Failed to initialize log math\n");
	}
	
	if (cmd_ln_str_r(config, "-i") == NULL || cmd_ln_str_r(config, "-i") == NULL) {
            E_ERROR("Please specify both input and output models\n");
            goto error_out;
        }
	    
	
	/* Load the input language model. */
        if (cmd_ln_str_r(config, "-ifmt")) {
            if ((itype = ngram_str_to_type(cmd_ln_str_r(config, "-ifmt")))
                == NGRAM_INVALID) {
                E_ERROR("Invalid input type %s\n", cmd_ln_str_r(config, "-ifmt"));
                goto error_out;
            }
            lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"),
                                  itype, lmath);
        }
        else {
            lm = ngram_model_read(config, cmd_ln_str_r(config, "-i"),
                                  NGRAM_AUTO, lmath);
	}

        /* Guess or set the output language model type. */
        if (cmd_ln_str_r(config, "-ofmt")) {
            if ((otype = ngram_str_to_type(cmd_ln_str_r(config, "-ofmt")))
                == NGRAM_INVALID) {
                E_ERROR("Invalid output type %s\n", cmd_ln_str_r(config, "-ofmt"));
                goto error_out;
            }
        }
        else {
            otype = ngram_file_name_to_type(cmd_ln_str_r(config, "-o"));
        }

        /* Recode the language model if desired. */
        if (cmd_ln_str_r(config, "-ienc")) {
            if (ngram_model_recode(lm, cmd_ln_str_r(config, "-ienc"),
                                   cmd_ln_str_r(config, "-oenc")) != 0) {
                E_ERROR("Failed to recode language model from %s to %s\n",
                        cmd_ln_str_r(config, "-ienc"),
                        cmd_ln_str_r(config, "-oenc"));
                goto error_out;
            }
        }

        /* Case fold if requested. */
        if ((kase = cmd_ln_str_r(config, "-case"))) {
            if (0 == strcmp(kase, "lower")) {
                ngram_model_casefold(lm, NGRAM_LOWER);
            }
            else if (0 == strcmp(kase, "upper")) {
                ngram_model_casefold(lm, NGRAM_UPPER);
            }
            else {
                E_ERROR("Unknown value for -case: %s\n", kase);
                goto error_out;
            }
        }

        /* Write the output language model. */
        if (ngram_model_write(lm, cmd_ln_str_r(config, "-o"), otype) != 0) {
            E_ERROR("Failed to write language model in format %s to %s\n",
                    ngram_type_to_str(otype), cmd_ln_str_r(config, "-o"));
            goto error_out;
        }

        /* That's all folks! */
        ngram_model_free(lm);
	return 0;

error_out:
        ngram_model_free(lm);
	return 1;
}
Example #29
0
static int
acmod_init_am(acmod_t *acmod)
{
    char const *mdeffn, *tmatfn, *mllrfn, *hmmdir;

    /* Read model definition. */
    if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
        if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL)
            E_ERROR("Acoustic model definition is not specified either "
                    "with -mdef option or with -hmm\n");
        else
            E_ERROR("Folder '%s' does not contain acoustic model "
                    "definition 'mdef'\n", hmmdir);

        return -1;
    }

    if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
        E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn);
        return -1;
    }

    /* Read transition matrices. */
    if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
        E_ERROR("No tmat file specified\n");
        return -1;
    }
    acmod->tmat = tmat_init(tmatfn, acmod->lmath,
                            cmd_ln_float32_r(acmod->config, "-tmatfloor"),
                            TRUE);

    /* Read the acoustic models. */
    if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
        || (cmd_ln_str_r(acmod->config, "-var") == NULL)
        || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
        E_ERROR("No mean/var/tmat files specified\n");
        return -1;
    }

    if (cmd_ln_str_r(acmod->config, "-senmgau")) {
        E_INFO("Using general multi-stream GMM computation\n");
        acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
        if (acmod->mgau == NULL)
            return -1;
    }
    else {
        E_INFO("Attempting to use PTM computation module\n");
        if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) {
            E_INFO("Attempting to use semi-continuous computation module\n");
            if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
                E_INFO("Falling back to general multi-stream GMM computation\n");
                acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef);
                if (acmod->mgau == NULL)
                    return -1;
            }
        }
    }

    /* If there is an MLLR transform, apply it. */
    if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
        ps_mllr_t *mllr = ps_mllr_read(mllrfn);
        if (mllr == NULL)
            return -1;
        acmod_update_mllr(acmod, mllr);
    }

    return 0;
}
Example #30
0
acmod_t *
acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
{
    acmod_t *acmod;
    char const *featparams;

    acmod = ckd_calloc(1, sizeof(*acmod));
    acmod->config = cmd_ln_retain(config);
    acmod->lmath = lmath;
    acmod->state = ACMOD_IDLE;

    /* Look for feat.params in acoustic model dir. */
    if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
        if (NULL !=
            cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE))
            E_INFO("Parsed model-specific feature parameters from %s\n",
                    featparams);
    }

    /* Initialize feature computation. */
    if (fe) {
        if (acmod_fe_mismatch(acmod, fe))
            goto error_out;
        fe_retain(fe);
        acmod->fe = fe;
    }
    else {
        /* Initialize a new front end. */
        acmod->fe = fe_init_auto_r(config);
        if (acmod->fe == NULL)
            goto error_out;
        if (acmod_fe_mismatch(acmod, acmod->fe))
            goto error_out;
    }
    if (fcb) {
        if (acmod_feat_mismatch(acmod, fcb))
            goto error_out;
        feat_retain(fcb);
        acmod->fcb = fcb;
    }
    else {
        /* Initialize a new fcb. */
        if (acmod_init_feat(acmod) < 0)
            goto error_out;
    }

    /* Load acoustic model parameters. */
    if (acmod_init_am(acmod) < 0)
        goto error_out;


    /* The MFCC buffer needs to be at least as large as the dynamic
     * feature window.  */
    acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
    acmod->mfc_buf = (mfcc_t **)
        ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
                      sizeof(**acmod->mfc_buf));

    /* Feature buffer has to be at least as large as MFCC buffer. */
    acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
    acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
    acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));

    acmod->utt_start_frame = 0;

    /* Senone computation stuff. */
    acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_scores));
    acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
    acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_active));
    acmod->log_zero = logmath_get_zero(acmod->lmath);
    acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
    return acmod;

error_out:
    acmod_free(acmod);
    return NULL;
}