Exemple #1
0
sphinx_wave2feat_t *
sphinx_wave2feat_init(cmd_ln_t *config)
{
    sphinx_wave2feat_t *wtf;
    int i;

    wtf = (sphinx_wave2feat_t *)ckd_calloc(1, sizeof(*wtf));
    wtf->refcount = 1;
    wtf->config = cmd_ln_retain(config);
    wtf->fe = fe_init_auto_r(wtf->config);
    if (!wtf->fe) {
	E_FATAL("Failed to create feature extraction\n");
    }

    wtf->ot = outtypes; /* Default (sphinx) type. */
    for (i = 0; i < nouttypes; ++i) {
        output_type_t const *otype = &outtypes[i];
        if (0 == strcmp(cmd_ln_str_r(config, "-ofmt"), otype->name)) {
            wtf->ot = otype;
            break;
        }
    }
    if (i == nouttypes) {
        E_ERROR("Unknown output type: '%s'\n",
                cmd_ln_str_r(config, "-ofmt"));
        sphinx_wave2feat_free(wtf);
        return NULL;
    }

    return wtf;
}
Exemple #2
0
int
main(int _argc, char **_argv)
{
    char *ctrlfn;
    char *cfgfn;
    cmd_ln_t *config = NULL;

    print_appl_info(_argv[0]);

    if (_argc != 4) {
        printf("\nUSAGE: %s <ctrlfile> <rawdir> <cfgfile>\n", _argv[0]);
        return -1;
    }

    ctrlfn = _argv[1];
    rawdirfn = _argv[2];
    cfgfn = _argv[3];

    if ((config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfgfn, TRUE)) == NULL)
        E_FATAL("Bad configuration file %s.\n", cfgfn);

    if (s3_decode_init(&decoder, config) != S3_DECODE_SUCCESS)
        E_FATAL("Failed to initialize live-decoder.\n");

    fe = fe_init_auto_r(config); 

    st = decoder.kb.stat;
    ptmr_init(&(st->tm));


    if (ctrlfn) {
        /* When -ctlfile is speicified, corpus.c will look at -ctl_lm and
           -ctl_mllr to get the corresponding LM and MLLR for the utterance */
        st->tm = ctl_process(ctrlfn,
                             cmd_ln_str_r(config, "-ctl_lm"),
                             cmd_ln_str_r(config, "-ctl_mllr"),
                             cmd_ln_int32_r(config, "-ctloffset"),
                             cmd_ln_int32_r(config, "-ctlcount"),
                             utt_livepretend, &(decoder.kb));
    }
    else {
        E_FATAL("control file is not specified.\n");
    }

    stat_report_corpus(decoder.kb.stat);

    s3_decode_close(&decoder);
    fe_free(fe);

    return 0;
}
static int
process(sbthread_t *th)
{
    FILE *raw;
    int16 *buf;
    mfcc_t **cepbuf;
    size_t nsamps;
    fe_t *fe;
    long fsize;
    int32 nfr;
    
    char outfile[16];
    FILE *logfile;
    
    sprintf(outfile, "%03ld.log", (long) sbthread_arg(th));
    logfile = fopen(outfile, "w");
    pthread_setspecific(logfp_index, (void *)logfile);

    if ((fe = fe_init_auto_r(sbthread_config(th))) == NULL)
        return -1;
    if ((raw = fopen(TESTDATADIR "/chan3.raw", "rb")) == NULL)
        return -1;
    fseek(raw, 0, SEEK_END);
    fsize = ftell(raw);
    fseek(raw, 0, SEEK_SET);
    buf = ckd_malloc(fsize);
    fread(buf, 1, fsize, raw);
    nsamps = fsize / 2;

    fe_process_utt(fe, buf, nsamps, &cepbuf, &nfr);
    E_INFO("nfr = %d\n", nfr);
    fe_free_2d(cepbuf);
    ckd_free(buf);
    fclose(raw);
    fe_free(fe);
    
    fclose(logfile);

    return 0;
}
int
main(int argc, char *argv[])
{
    char const *cfg;
    int i;
    int16 buf[2048];

    if (argc == 2) {
        config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
    }
    else {
        config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
    }
    /* Handle argument file as -argfile. */
    if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
        config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
    }
    if (config == NULL)
        return 1;

    singlefile = cmd_ln_boolean_r(config, "-singlefile");
    if ((infile_path = cmd_ln_str_r(config, "-infile")) != NULL) {
        if ((infile = fopen(infile_path, "rb")) == NULL) {
            E_FATAL_SYSTEM("Failed to read audio from '%s'", infile_path);
            return 1;
        }
        read_audio = &read_audio_file;
        /* skip wav header */
        read_audio(buf, 44);
    }
    else {
        if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
                              (int) cmd_ln_float32_r(config,
                                                     "-samprate"))) ==
            NULL) {
            E_FATAL("Failed to open audio device\n");
            return 1;
        }
        read_audio = &read_audio_adev;
        printf("Start recording ...\n");
        fflush(stdout);
        if (ad_start_rec(ad) < 0)
            E_FATAL("Failed to start recording\n");

        /* TODO remove this thing */
        for (i = 0; i < 5; i++) {
            sleep_msec(200);
            read_audio(buf, 2048);
        }
        printf("You may speak now\n");
        fflush(stdout);
    }

    fe = fe_init_auto_r(config);
    if (fe == NULL)
        return 1;

    segment_audio();

    if (ad)
        ad_close(ad);
    if (infile)
        fclose(infile);

    fe_free(fe);
    cmd_ln_free_r(config);
    return 0;
}
Exemple #5
0
acmod_t *
acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
{
    acmod_t *acmod;
    char const *featparams;

    acmod = ckd_calloc(1, sizeof(*acmod));
    acmod->config = cmd_ln_retain(config);
    acmod->lmath = lmath;
    acmod->state = ACMOD_IDLE;

    /* Look for feat.params in acoustic model dir. */
    if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
        if (NULL !=
            cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE))
            E_INFO("Parsed model-specific feature parameters from %s\n",
                    featparams);
    }

    /* Initialize feature computation. */
    if (fe) {
        if (acmod_fe_mismatch(acmod, fe))
            goto error_out;
        fe_retain(fe);
        acmod->fe = fe;
    }
    else {
        /* Initialize a new front end. */
        acmod->fe = fe_init_auto_r(config);
        if (acmod->fe == NULL)
            goto error_out;
        if (acmod_fe_mismatch(acmod, acmod->fe))
            goto error_out;
    }
    if (fcb) {
        if (acmod_feat_mismatch(acmod, fcb))
            goto error_out;
        feat_retain(fcb);
        acmod->fcb = fcb;
    }
    else {
        /* Initialize a new fcb. */
        if (acmod_init_feat(acmod) < 0)
            goto error_out;
    }

    /* Load acoustic model parameters. */
    if (acmod_init_am(acmod) < 0)
        goto error_out;


    /* The MFCC buffer needs to be at least as large as the dynamic
     * feature window.  */
    acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
    acmod->mfc_buf = (mfcc_t **)
        ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
                      sizeof(**acmod->mfc_buf));

    /* Feature buffer has to be at least as large as MFCC buffer. */
    acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
    acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
    acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos));

    acmod->utt_start_frame = 0;

    /* Senone computation stuff. */
    acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_scores));
    acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
    acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
                                                     sizeof(*acmod->senone_active));
    acmod->log_zero = logmath_get_zero(acmod->lmath);
    acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
    return acmod;

error_out:
    acmod_free(acmod);
    return NULL;
}
Exemple #6
0
int ofxSphinxASR::engineInit(ofAsrEngineArgs *e)
{
#if defined TARGET_WIN32
    char cfg_filename[] = "sphinx.cfg";
    char grammarJSGF_filename[] = "grammar.jsgf";
    char grammarFSG_filename[] = "grammar.fsg";
#else
	char cfg_filename[] = "/tmp/sphinx.cfg";
    char grammarJSGF_filename[] = "/tmp/grammar.jsgf";
    char grammarFSG_filename[] = "/tmp/grammar.fsg";
#endif	
    FILE *cfg_fp = fopen(cfg_filename, "wt");
    if (cfg_fp==NULL)
        return OFXASR_FAIL_WRITE_CONFIG;
    if ( access(e->sphinxmodel_am.c_str(), 0) != 0 )
		return OFXASR_FAIL_READ_FILES;
	if ( access(e->sphinxmodel_lm.c_str(), 0) != 0 )
		return OFXASR_FAIL_READ_FILES;
	if ( access(e->sphinxmodel_dict.c_str(), 0) != 0 )
		return OFXASR_FAIL_READ_FILES;
	if ( access(e->sphinxmodel_fdict.c_str(), 0) != 0 )
		return OFXASR_FAIL_READ_FILES;
	
	char cur_path[1024];
	getcwd(cur_path, 1024);

    fprintf(cfg_fp, "-samprate %d\n", e->samplerate);
    fprintf(cfg_fp, "-hmm %s/%s\n", cur_path, e->sphinxmodel_am.c_str());
    fprintf(cfg_fp, "-dict %s/%s\n", cur_path, e->sphinxmodel_dict.c_str());
    fprintf(cfg_fp, "-fdict %s/%s\n", cur_path, e->sphinxmodel_fdict.c_str());
    fprintf(cfg_fp, "-lm %s/%s\n", cur_path, e->sphinxmodel_lm.c_str());
    if (e->sphinx_mode == 2) {
        if(e->sphinx_candidate_sentences.size() < 1) {
            printf("Warning: The word list is empty! Use mode 4.\n");
            e->sphinx_mode = 4;
        }
        else {
            FILE *gram_fp = fopen(grammarJSGF_filename, "wt");
            if (gram_fp==NULL)
                return OFXASR_FAIL_WRITE_CONFIG;
            fprintf(gram_fp,
                "#JSGF V1.0;\n\ngrammar cca_gram;\n\npublic <cca_gram> = (\n");
            for (int i=0; i<e->sphinx_candidate_sentences.size()-1; i++) {
                fprintf(gram_fp, "%s |\n",
                 e->sphinx_candidate_sentences[i].c_str());
            }
            fprintf(gram_fp, "%s );\n\n",
                e->sphinx_candidate_sentences[e->sphinx_candidate_sentences.size()-1].c_str());
            fclose(gram_fp);
        }
        jsgf_t *jsgf = jsgf_parse_file(grammarJSGF_filename, NULL);
        if (jsgf == NULL) {
            printf("Bad jsgf file %s.\n", grammarJSGF_filename);
            return OFXASR_INVALID_JSGF_GRAMMAR;
        }
        fsg_model_t *fsg = get_fsg(jsgf, NULL);
        fsg_model_writefile(fsg, grammarFSG_filename);
        fsg_model_free(fsg);
        jsgf_grammar_free(jsgf);
        fprintf(cfg_fp, "-fsg %s\n", grammarFSG_filename);
    }
    fprintf(cfg_fp, "-op_mode %d\n", e->sphinx_mode);
    fclose(cfg_fp);
    err_set_logfp(NULL); // disable logs
    cmd_ln_t *config = NULL;
    config = cmd_ln_parse_file_r(config, S3_DECODE_ARG_DEFS, cfg_filename, TRUE);
    if (config == NULL) {
        return OFXASR_INVALID_CONFIG;
    }
    decoder = new s3_decode_t;
    if (s3_decode_init(decoder, config) != S3_DECODE_SUCCESS) {
        return OFXASR_FAIL_INIT_DECODER;
    }
    fe = fe_init_auto_r(config);
    if (fe == NULL) {
        return OFXASR_FAIL_INIT_FRONTEND;
    }
    bEngineInitialed = true;
    return OFXASR_SUCCESS;
}
Exemple #7
0
void
kb_init(kb_t * kb, cmd_ln_t *config)
{
    kbcore_t *kbcore;
    mdef_t *mdef;
    dict_t *dict;
    dict2pid_t *d2p;
    int32 cisencnt;

    /* STRUCTURE: Initialize the kb structure to zero, just in case */
    memset(kb, 0, sizeof(*kb));
    kb->kbcore = kbcore_init(config);
    if (kb->kbcore == NULL)
        E_FATAL("Initialization of kb failed\n");

    kbcore = kb->kbcore;
    mdef = kbcore_mdef(kbcore);
    dict = kbcore_dict(kbcore);
    d2p = kbcore_dict2pid(kbcore);

    err_set_debug_level(cmd_ln_int32_r(config, "-debug"));

    /* STRUCTURE INITIALIZATION: Initialize the beam data structure */
    if (cmd_ln_exists_r(config, "-ptranskip")) {
        kb->beam = beam_init(cmd_ln_float64_r(config, "-beam"),
                             cmd_ln_float64_r(config, "-pbeam"),
                             cmd_ln_float64_r(config, "-wbeam"),
                             cmd_ln_float64_r(config, "-wend_beam"),
                             cmd_ln_int32_r(config, "-ptranskip"), mdef_n_ciphone(mdef),
                             kbcore->logmath
            );

        /* REPORT : Report the parameters in the beam data structure */
        if (REPORT_KB)
                beam_report(kb->beam);
    }


    /* STRUCTURE INITIALIZATION: Initialize the fast GMM computation data structure */
    if (cmd_ln_exists_r(config, "-ci_pbeam")) {
        kb->fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"),
                                    cmd_ln_int32_r(config, "-cond_ds"),
                                    cmd_ln_int32_r(config, "-dist_ds"),
                                    cmd_ln_int32_r(config, "-gs4gs"),
                                    cmd_ln_int32_r(config, "-svq4svq"),
                                    cmd_ln_float64_r(config, "-subvqbeam"),
                                    cmd_ln_float64_r(config, "-ci_pbeam"),
                                    cmd_ln_float64_r(config, "-tighten_factor"),
                                    cmd_ln_int32_r(config, "-maxcdsenpf"),
                                    mdef->n_ci_sen,
                                    kbcore->logmath);

        /* REPORT : Report the parameters in the fast_gmm_t data struture */
        if (REPORT_KB)
            fast_gmm_report(kb->fastgmm);
    }

    /* STRUCTURE INITIALIZATION: Initialize the phoneme lookahead data structure */
    if (cmd_ln_exists_r(config, "-pl_beam")) {
        kb->pl = pl_init(cmd_ln_int32_r(config, "-pheurtype"),
                         cmd_ln_float64_r(config, "-pl_beam"), mdef_n_ciphone(mdef),
                         kbcore->logmath
            );

        /* REPORT : Report the parameters in the pl_t data struture */
        if (REPORT_KB)
            pl_report(kb->pl);
    }

    /* STRUCTURE INITIALIZATION: Initialize the acoustic score data structure */
    {
        int32 pl_window = 1;

        if (cmd_ln_exists_r(config, "-pl_window"))
            pl_window = cmd_ln_int32_r(config, "-pl_window");

        for (cisencnt = 0; cisencnt == mdef->cd2cisen[cisencnt]; cisencnt++) ;
        kb->ascr = ascr_init(kbcore_n_mgau(kbcore),
                             kb->kbcore->dict2pid->n_comstate,
                             mdef_n_sseq(mdef),
                             dict2pid_n_comsseq(d2p),
                             pl_window, cisencnt);

        if (REPORT_KB)
            ascr_report(kb->ascr);
    }

    /* Initialize the front end if -adcin is specified */
    if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) {
        if ((kb->fe = fe_init_auto_r(config)) == NULL) {
            E_FATAL("fe_init_auto_r() failed\n");
        }
    }
    /* STRUCTURE INITIALIZATION : The feature vector */
    if ((kb->feat =
         feat_array_alloc(kbcore_fcb(kbcore), S3_MAX_FRAMES)) == NULL)
        E_FATAL("feat_array_alloc() failed\n");

    /* STRUCTURE INITIALIZATION : The statistics for the search */
    kb->stat = stat_init();

    /* STRUCTURE INITIALIZATION : The adaptation routines of the search */
    kb->adapt_am = adapt_am_init();

    if (cmd_ln_str_r(config, "-mllr")) {
        kb_setmllr(cmd_ln_str_r(config, "-mllr"), cmd_ln_str_r(config, "-cb2mllr"), kb);
    }

    /* CHECK: make sure when (-cond_ds) is specified, a Gaussian map is also specified */
    if (cmd_ln_int32_r(config, "-cond_ds") > 0 && kb->kbcore->gs == NULL)
        E_FATAL
            ("Conditional Down Sampling require the use of Gaussian Selection map\n");

    /* MEMORY ALLOCATION : Word best score and exit */
    /* Open hypseg file if specified */
    kb->matchsegfp = kb->matchfp = NULL;
    kb->matchsegfp = file_open(cmd_ln_str_r(config, "-hypseg"));
    kb->matchfp = file_open(cmd_ln_str_r(config, "-hyp"));

    if (cmd_ln_exists_r(config, "-hmmdump"))
        kb->hmmdumpfp = cmd_ln_int32_r(config, "-hmmdump") ? stderr : NULL;

    /* STRUCTURE INITIALIZATION : The search data structure, done only
       after kb is initialized kb is acted as a clipboard. */
    if (cmd_ln_exists_r(config, "-op_mode")) {
        /* -op_mode, if set (i.e. not -1), takes precedence over -mode. */
        if (cmd_ln_int32_r(config, "-op_mode") != -1)
            kb->op_mode = cmd_ln_int32_r(config, "-op_mode");
        else
            kb->op_mode = srch_mode_str_to_index(cmd_ln_str_r(config, "-mode"));
        E_INFO("SEARCH MODE INDEX %d\n", kb->op_mode);
        if ((kb->srch = (srch_t *) srch_init(kb, kb->op_mode)) == NULL) {
            E_FATAL("Search initialization failed. Forced exit\n");
        }
        if (REPORT_KB) {
            srch_report(kb->srch);
        }
    }
}
Exemple #8
0
int
main(int argc, char *argv[])
{
	static const arg_t fe_args[] = {
		waveform_to_cepstral_command_line_macro(),
		{ NULL, 0, NULL, NULL }
	};
	FILE *raw;
	cmd_ln_t *config;
	fe_t *fe;
	int16 buf[1024];
	int16 const *inptr;
	int32 frame_shift, frame_size;
	mfcc_t **cepbuf1, **cepbuf2, **cptr;
	int32 nfr, i;
	size_t nsamp;

	TEST_ASSERT(config = cmd_ln_parse_r(NULL, fe_args, argc, argv, FALSE));
	TEST_ASSERT(fe = fe_init_auto_r(config));

	TEST_EQUAL(fe_get_output_size(fe), DEFAULT_NUM_CEPSTRA);

	fe_get_input_size(fe, &frame_shift, &frame_size);
	TEST_EQUAL(frame_shift, DEFAULT_FRAME_SHIFT);
	TEST_EQUAL(frame_size, (int)(DEFAULT_WINDOW_LENGTH*DEFAULT_SAMPLING_RATE));

	TEST_ASSERT(raw = fopen(TESTDATADIR "/chan3.raw", "rb"));

	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_EQUAL(1024, fread(buf, sizeof(int16), 1024, raw));

	nsamp = 1024;
	TEST_ASSERT(fe_process_frames(fe, NULL, &nsamp, NULL, &nfr) >= 0);
	TEST_EQUAL(1024, nsamp);
	TEST_EQUAL(4, nfr);

	cepbuf1 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf1));
	inptr = &buf[0];
	nfr = 1;

	printf("frame_size %d frame_shift %d\n", frame_size, frame_shift);
	/* Process the first frame. */
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[0], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	/* Note that this next one won't actually consume any frames
	 * of input, because it already got sufficient overflow
	 * samples last time around.  This is implementation-dependent
	 * so we shouldn't actually test for it. */
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[1], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[2], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[3], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_end_utt(fe, cepbuf1[4], &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	/* What we *should* test is that the output we get by
	 * processing one frame at a time is exactly the same as what
	 * we get from doing them all at once.  So let's do that */
	cepbuf2 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf2));
	inptr = &buf[0];
	nfr = 5;
	nsamp = 1024;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cepbuf2, &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 4);
	nfr = 1;
	TEST_ASSERT(fe_end_utt(fe, cepbuf2[4], &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	/* Now, also test to make sure that even if we feed data in
	 * little tiny bits we can still make things work. */
	memset(cepbuf2[0], 0, 5 * DEFAULT_NUM_CEPSTRA * sizeof(**cepbuf2));
	inptr = &buf[0];
	cptr = &cepbuf2[0];
	nfr = 5;
	i = 5;
	nsamp = 256;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	TEST_ASSERT(fe_end_utt(fe, *cptr, &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	/* And now, finally, test fe_process_utt() */
	inptr = &buf[0];
	i = 0;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_end_utt(fe, cepbuf2[i], &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	ckd_free_2d(cepbuf1);
	ckd_free_2d(cepbuf2);
	fclose(raw);
	fe_free(fe);

	return 0;
}
Exemple #9
0
fe_t *
fe_init_auto()
{
    return fe_init_auto_r(cmd_ln_get());
}
Exemple #10
0
int
sphinx_wave2feat_convert_file(sphinx_wave2feat_t *wtf,
                              char const *infile, char const *outfile)
{
    int nchans, minfft, nfft, nfloat, veclen;
    audio_type_t const *atype;
    int fshift, fsize;

    if (cmd_ln_boolean_r(wtf->config, "-verbose"))
        E_INFO("Converting %s to %s\n", infile, outfile);

    wtf->infile = ckd_salloc(infile);

    /* Detect input file type. */
    if ((atype = detect_audio_type(wtf)) == NULL)
        return -1;

    /* Determine whether to byteswap input. */
    wtf->byteswap = strcmp(cmd_ln_str_r(wtf->config, "-mach_endian"),
                           cmd_ln_str_r(wtf->config, "-input_endian"));

    /* Make sure the FFT size is sufficiently large. */
    minfft = (int)(cmd_ln_float32_r(wtf->config, "-samprate")
                   * cmd_ln_float32_r(wtf->config, "-wlen") + 0.5);
    for (nfft = 1; nfft < minfft; nfft <<= 1)
        ;
    if (nfft > cmd_ln_int32_r(wtf->config, "-nfft")) {
        E_WARN("Value of -nfft = %d is too small, increasing to %d\n",
               cmd_ln_int32_r(wtf->config, "-nfft"), nfft);
        cmd_ln_set_int32_r(wtf->config, "-nfft", nfft);
        fe_free(wtf->fe);
        wtf->fe = fe_init_auto_r(wtf->config);
    }

    /* Get the output frame size (if not already set). */
    if (wtf->veclen == 0)
        wtf->veclen = fe_get_output_size(wtf->fe);

    /* Set up the input and output buffers. */
    fe_get_input_size(wtf->fe, &fshift, &fsize);
    /* Want to get at least a whole frame plus shift in here.  Also we
       will either pick or mix multiple channels so we need to read
       them all at once. */
    nchans = cmd_ln_int32_r(wtf->config, "-nchans");
    wtf->blocksize = cmd_ln_int32_r(wtf->config, "-blocksize") * nchans;
    if (wtf->blocksize < (fsize + fshift) * nchans) {
        E_INFO("Block size of %d too small, increasing to %d\n",
               wtf->blocksize,
               (fsize + fshift) * nchans);
        wtf->blocksize = (fsize + fshift) * nchans;
    }
    wtf->audio = (short *)ckd_calloc(wtf->blocksize, sizeof(*wtf->audio));
    wtf->featsize = (wtf->blocksize / nchans - fsize) / fshift;

    /* Use the maximum of the input and output frame sizes to allocate this. */
    veclen = wtf->veclen;
    if (wtf->in_veclen > veclen) veclen = wtf->in_veclen;
    
    wtf->feat = (mfcc_t**)ckd_calloc_2d(wtf->featsize, veclen, sizeof(**wtf->feat));

    /* Let's go! */
    if ((wtf->outfh = fopen(outfile, "wb")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s for writing", outfile);
        return -1;
    }
    /* Write an empty header, which we'll fill in later. */
    if (wtf->ot->output_header &&
        (*wtf->ot->output_header)(wtf, 0) < 0) {
        E_ERROR_SYSTEM("Failed to write empty header to %s\n", outfile);
        goto error_out;
    }
    wtf->outfile = ckd_salloc(outfile);

    if ((nfloat = (*atype->decode)(wtf)) < 0) {
    	E_ERROR("Failed to convert");
    	goto error_out;
    }

    if (wtf->ot->output_header) {
        if (fseek(wtf->outfh, 0, SEEK_SET) < 0) {
            E_ERROR_SYSTEM("Failed to seek to beginning of %s\n", outfile);
            goto error_out;
        }
        if ((*wtf->ot->output_header)(wtf, nfloat) < 0) {
            E_ERROR_SYSTEM("Failed to write header to %s\n", outfile);
            goto error_out;
        }
    }
    

    if (wtf->audio)
	ckd_free(wtf->audio);
    if (wtf->feat)
	ckd_free_2d(wtf->feat);
    if (wtf->infile)
        ckd_free(wtf->infile);
    if (wtf->outfile)
	ckd_free(wtf->outfile);

    wtf->audio = NULL;
    wtf->infile = NULL;
    wtf->feat = NULL;
    wtf->outfile = NULL;

    if (wtf->outfh)
	if (fclose(wtf->outfh) == EOF)
    	    E_ERROR_SYSTEM("Failed to close output file");
    wtf->outfh = NULL;

    return 0;

error_out:

    if (wtf->audio)
	ckd_free(wtf->audio);
    if (wtf->feat)
	ckd_free_2d(wtf->feat);
    if (wtf->infile)
        ckd_free(wtf->infile);
    if (wtf->outfile)
	ckd_free(wtf->outfile);

    wtf->audio = NULL;
    wtf->infile = NULL;
    wtf->feat = NULL;
    wtf->outfile = NULL;

    if (wtf->outfh)
	if (fclose(wtf->outfh) == EOF)
    	    E_ERROR_SYSTEM("Failed to close output file");
    wtf->outfh = NULL;

    return -1;
}
Exemple #11
0
static void
models_init(cmd_ln_t *config)
{
    int32 cisencnt;

    kbc = New_kbcore(config);

    kbc->logmath = logs3_init(cmd_ln_float64_r(config, "-logbase"), 1,
                              cmd_ln_int32_r(config, "-log3table"));

    /* Initialize feaure stream type */
    kbc->fcb = feat_init(cmd_ln_str_r(config, "-feat"),
			 cmn_type_from_str(cmd_ln_str_r(config, "-cmn")),
			 cmd_ln_boolean_r(config, "-varnorm"),
			 agc_type_from_str(cmd_ln_str_r(config, "-agc")), 1,
			 cmd_ln_int32_r(config, "-ceplen"));

    s3_am_init(kbc);

    /* Initialize the front end if -adcin is specified */
    if (cmd_ln_exists_r(config, "-adcin") && cmd_ln_boolean_r(config, "-adcin")) {
        if ((fe = fe_init_auto_r(config)) == NULL) {
            E_FATAL("fe_init_auto_r() failed\n");
        }
    }

    assert(kbc);
    assert(kbc->mdef);
    assert(kbc->tmat);

    /* Dictionary */
    dict = dict_init(kbc->mdef, cmd_ln_str_r(config, "-dict"),
                     cmd_ln_str_r(config, "-fdict"), 
                     cmd_ln_int32_r(config, "-lts_mismatch"),
		     cmd_ln_boolean_r(config, "-mdef_fillers"),
		     /* Never do mdef filler phones. */
		     FALSE,
		     TRUE);




    for (cisencnt = 0; cisencnt == kbc->mdef->cd2cisen[cisencnt];
         cisencnt++);

    ascr = ascr_init(kbc->mdef->n_sen, 0,       /* No composite senone */
                     mdef_n_sseq(kbc->mdef), 0, /* No composite senone sequence */
                     1,         /* Phoneme lookahead window =1. Not enabled phoneme lookahead at this moment */
                     cisencnt);

    fastgmm = fast_gmm_init(cmd_ln_int32_r(config, "-ds"),
                            cmd_ln_int32_r(config, "-cond_ds"),
                            cmd_ln_int32_r(config, "-dist_ds"),
                            cmd_ln_int32_r(config, "-gs4gs"),
                            cmd_ln_int32_r(config, "-svq4svq"),
                            cmd_ln_float64_r(config, "-subvqbeam"),
                            cmd_ln_float64_r(config, "-ci_pbeam"),
                            cmd_ln_float64_r(config, "-tighten_factor"),
                            cmd_ln_int32_r(config, "-maxcdsenpf"),
                            kbc->mdef->n_ci_sen,
                            kbc->logmath);
    adapt_am = adapt_am_init();
}