コード例 #1
0
int32 live_utt_decode_block (int16 *samples, int32 nsamples, 
		      int32 live_endutt, partialhyp_t **ohyp)
{
    static int32 live_begin_new_utt = 1;
    static int32 frmno;
    float32 **live_feat;
    int32   live_nfr, live_nfeatvec;
    int32   nwds;
    /* int32   id;  */  /* unreferenced variable */
    /* glist_t hyp;  */  /* unreferenced variable */
    /* gnode_t *gn;  */  /* unreferenced variable */
    /* hyp_t   *h;  */  /* unreferenced variable */
    /* dict_t  *dict;  */  /* unreferenced variable */
    float32 **mfcbuf;

    if (live_begin_new_utt){
        fe_start_utt(fe);
	utt_begin (kb);
	frmno = 0;
	kb->nfr = 0;
        kb->utt_hmm_eval = 0;
        kb->utt_sen_eval = 0;
        kb->utt_gau_eval = 0;
        live_begin_new_utt = 0;
    }

    sample_blk++;

    /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */
    mfcbuf = NULL;

    live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /*  */
    if (live_endutt) 		/* RAH, It seems that we shouldn't throw out this data */
        fe_end_utt(fe,dummyframe); /* Flush out the fe */

    /* Compute feature vectors */
    live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf,
                                         live_nfr, live_begin_new_utt,
					 live_endutt, &live_feat);
    E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec);


    /* decode the block */
    if (sample_blk<=START_BLOCK) 
      single_utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, 
			       maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp);
    else 
      utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, 
			maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp);

    /* Pull out partial hypothesis */
    nwds =  live_get_partialhyp(live_endutt);
    *ohyp = parthyp;

    /* Clean up */
    if (live_endutt) {
	live_begin_new_utt = 1;
	kb->tot_fr += kb->nfr;
	utt_end(kb);
    }
    else {
	live_begin_new_utt = 0;
    }

    /* I'm starting to think that fe_process_utt should not be allocating its memory,
       that or it should allocate some max and just keep on going, this idea of constantly allocating freeing
       memory seems dangerous to me.*/
    ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */


    return(nwds);
}
コード例 #2
0
ファイル: fe_interface.c プロジェクト: AtDinesh/Jaf_pose_est
fe_t *
fe_init_auto_r(cmd_ln_t *config)
{
    fe_t *fe;

    fe = ckd_calloc(1, sizeof(*fe));
    fe->refcount = 1;

    /* transfer params to front end */
    if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
        fe_free(fe);
        return NULL;
    }

    /* compute remaining fe parameters */
    /* We add 0.5 so approximate the float with the closest
     * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
     */
    fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
    fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
    fe->prior = 0;
    fe->frame_counter = 0;

    assert (fe->frame_shift > 1);

    if (fe->frame_size > (fe->fft_size)) {
        E_WARN
            ("Number of FFT points has to be a power of 2 higher than %d\n",
             (fe->frame_size));
        fe_free(fe);
        return (NULL);
    }

    if (fe->dither)
        fe_init_dither(fe->seed);

    /* establish buffers for overflow samps and hamming window */
    fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
    fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));

    /* create hamming window */
    fe_create_hamming(fe->hamming_window, fe->frame_size);

    /* init and fill appropriate filter structure */
    fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));

    /* transfer params to mel fb */
    fe_parse_melfb_params(config, fe, fe->mel_fb);
    fe_build_melfilters(fe->mel_fb);
    fe_compute_melcosine(fe->mel_fb);

    /* Create temporary FFT, spectrum and mel-spectrum buffers. */
    /* FIXME: Gosh there are a lot of these. */
    fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
    fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
    fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
    fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));

    /* create twiddle factors */
    fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
    fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
    fe_create_twiddle(fe);

    if (cmd_ln_boolean_r(config, "-verbose")) {
        fe_print_current(fe);
    }

    /*** Z.A.B. ***/
    /*** Initialize the overflow buffers ***/
    fe_start_utt(fe);
    return fe;
}
コード例 #3
0
ファイル: fe_interface.c プロジェクト: JonGBowen/GoodVibes
fe_t *
fe_init_auto_r(cmd_ln_t *config)
{
    fe_t *fe;
    int prespch_frame_len;

    fe = (fe_t*)ckd_calloc(1, sizeof(*fe));
    fe->refcount = 1;

    /* transfer params to front end */
    if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) {
        fe_free(fe);
        return NULL;
    }

    /* compute remaining fe parameters */
    /* We add 0.5 so approximate the float with the closest
     * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
     */
    fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5);
    fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5);
    fe->prior = 0;
    
    fe_start_stream(fe);

    assert (fe->frame_shift > 1);

    if (fe->frame_size > (fe->fft_size)) {
        E_ERROR
            ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n",
             fe->frame_size, fe->fft_size);
        fe_free(fe);
        return NULL;
    }

    if (fe->dither)
        fe_init_dither(fe->seed);

    /* establish buffers for overflow samps and hamming window */
    fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16));
    fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t));

    /* create hamming window */
    fe_create_hamming(fe->hamming_window, fe->frame_size);

    /* init and fill appropriate filter structure */
    fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb));

    /* transfer params to mel fb */
    fe_parse_melfb_params(config, fe, fe->mel_fb);
    
    if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) {
	E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", 
		fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2);
	fe_free(fe);
	return NULL;
    }
    
    fe_build_melfilters(fe->mel_fb);

    fe_compute_melcosine(fe->mel_fb);
    if (fe->remove_noise || fe->remove_silence)
        fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters);

    fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data));
    prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters;
    fe->vad_data->prespch_buf = fe_prespch_init(fe->prespch_len + 1, prespch_frame_len, fe->frame_shift);

    /* Create temporary FFT, spectrum and mel-spectrum buffers. */
    /* FIXME: Gosh there are a lot of these. */
    fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch));
    fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame));
    fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec));
    fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec));

    /* create twiddle factors */
    fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc));
    fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss));
    fe_create_twiddle(fe);

    if (cmd_ln_boolean_r(config, "-verbose")) {
        fe_print_current(fe);
    }

    /*** Initialize the overflow buffers ***/
    fe_start_utt(fe);
    return fe;
}
コード例 #4
0
ファイル: main_align.c プロジェクト: Ankit77/cmusphinx
static void
utt_align(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid)
{
    int32 nfr;
    int k, i;
    const char *cepdir;
    const char *cepext;
    char sent[16384];
    cmd_ln_t *config = (cmd_ln_t*) data;

    cepdir = cmd_ln_str_r(kbc->config, "-cepdir");
    cepext = cmd_ln_str_r(kbc->config, "-cepext");


    /* UGLY! */
    /* Read utterance transcript and match it with the control file. */
    if (fgets(sent, sizeof(sent), sentfp) == NULL) {
        E_FATAL("EOF(%s) of the transcription\n", sentfile);
    }
    /*  E_INFO("SENT %s\n",sent); */
    /* Strip utterance id from the end of the transcript */
    for (k = strlen(sent) - 1;
         (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t')
                     || (sent[k] == ' ')); --k);
    if ((k > 0) && (sent[k] == ')')) {
        for (--k; (k >= 0) && (sent[k] != '('); --k);
        if ((k >= 0) && (sent[k] == '(')) {
            sent[k] = '\0';

            /* Check that uttid in transcript and control file match */
            for (i = ++k;
                 sent[i] && (sent[i] != ')') &&
                 (sent[i] != '\n') && (sent[i] != '\t')
                 && (sent[i] != ' '); i++);
            sent[i] = '\0';
            if (id_cmp(sent + k, uttid) != 0)
                E_ERROR
                    ("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n",
                     uttid, sent + k);
        }
    }

    /* Convert input file to cepstra if waveform input is selected */
    if (cmd_ln_boolean_r(config, "-adcin")) {
        int16 *adcdata;
        int32 nsamps = 0;
        mfcc_t **mfcc;

        if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"),
    				        ur->uttfile,
    				        cmd_ln_str_r(config, "-cepext"),
    				        cmd_ln_int32_r(config, "-adchdr"),
    				        strcmp(cmd_ln_str_r(config, "-input_endian"), "big"),
    				        &nsamps)) == NULL) {
            E_FATAL("Cannot read file %s\n", ur->uttfile);
        }
        fe_start_utt(fe);
        if (fe_process_utt(fe, adcdata, nsamps, &mfcc, &nfr) < 0) {
            E_FATAL("MFCC calculation failed\n", ur->uttfile);
        }
        ckd_free(adcdata);
        if (nfr > S3_MAX_FRAMES) {
            E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES);
        }
        if ((nfr = feat_s2mfc2feat_live(kbcore_fcb(kbc),
						mfcc,
						&nfr,
						TRUE, TRUE,
						feat)) < 0) {
            E_FATAL("Feature computation failed\n");
        }
        if (mfcc)
            ckd_free_2d((void **)mfcc);
    }
    else {
        nfr =
            feat_s2mfc2feat(kbcore_fcb(kbc), ur->uttfile, cepdir, cepext, sf, ef, feat,
                            S3_MAX_FRAMES);
    }

    if (ur->regmatname) {
        if (kbc->mgau)
            adapt_set_mllr(adapt_am, kbc->mgau, ur->regmatname,
                           ur->cb2mllrname, kbc->mdef, kbc->config);
        else if (kbc->ms_mgau)
            model_set_mllr(kbc->ms_mgau, ur->regmatname, ur->cb2mllrname,
                           kbcore_fcb(kbc), kbc->mdef, kbc->config);
        else
            E_WARN("Can't use MLLR matrices with .s2semi. yet\n");
    }

    if (nfr <= 0) {
        if (cepdir != NULL) {
            E_ERROR
                ("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n",
                 uttid, ur->uttfile, cepdir, cepext);
        }
        else {
            E_ERROR
                ("Utt %s: Input file read (%s) with extension (%s) failed \n",
                 uttid, ur->uttfile, cepext);
        }
    }
    else {
        E_INFO("%s: %d input frames\n", uttid, nfr);
        align_utt(sent, nfr, ur->uttfile, uttid);
    }

}
コード例 #5
0
ファイル: cont_seg.c プロジェクト: JonGBowen/GoodVibes
void
segment_audio()
{
    FILE *file;
    int16 pcm_buf[BLOCKSIZE];
    mfcc_t **cep_buf;
    int16 *voiced_buf;
    int32 voiced_nsamps, out_frameidx, uttstart = 0;
    char file_name[1024];
    uint8 cur_vad_state, vad_state, writing;
    int uttno, uttlen, sample_rate;
    int32 nframes, nframes_tmp;
    int16 frame_size, frame_shift, frame_rate;
    size_t k;

    sample_rate = (int) cmd_ln_float32_r(config, "-samprate");
    frame_rate = cmd_ln_int32_r(config, "-frate");
    frame_size =
        (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5);
    frame_shift =
        (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5);
    nframes = (BLOCKSIZE - frame_size) / frame_shift;
    cep_buf =
        (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe),
                                  sizeof(mfcc_t));

    uttno = 0;
    uttlen = 0;
    cur_vad_state = 0;
    voiced_nsamps = 0;
    writing = 0;
    file = NULL;
    voiced_buf = NULL;
    fe_start_stream(fe);
    fe_start_utt(fe);
    while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) {
        int16 const *pcm_buf_tmp;
        pcm_buf_tmp = &pcm_buf[0];
        while (k) {
            nframes_tmp = nframes;
            fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf,
                                  &nframes_tmp, &voiced_buf,
                                  &voiced_nsamps, &out_frameidx);
            if (out_frameidx > 0) {
        	uttstart = out_frameidx;
            }
            vad_state = fe_get_vad_state(fe);
            if (!cur_vad_state && vad_state) {
                /* silence->speech transition, time to start new file */
                uttno++;
                if (!singlefile) {
                    sprintf(file_name, "%s%04d.raw", infile_path, uttno);
                    if ((file = fopen(file_name, "wb")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
                } else {
                    sprintf(file_name, "%s.raw", infile_path);
                    if ((file = fopen(file_name, "ab")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
		}
		writing = 1;
            }

            if (writing && file && voiced_nsamps > 0) {
                fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file);
                uttlen += voiced_nsamps;
            }

            if (cur_vad_state && !vad_state) {
                /* speech -> silence transition, time to finish file */
                fclose(file);
	        printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    		       uttno,
    		       file_name,
    	    	       ((double) uttstart) / frame_rate,
            	        uttlen,
            	       ((double) uttlen) / sample_rate);
                fflush(stdout);
                fe_end_utt(fe, cep_buf[0], &nframes_tmp);
                writing = 0;
                uttlen = 0;
                voiced_nsamps = 0;
                fe_start_utt(fe);
            }
            cur_vad_state = vad_state;
        }
    }

    if (writing) {
        fclose(file);
	printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    	        uttno,
    		file_name,
    	    	((double) uttstart) / frame_rate,
            	uttlen,
                ((double) uttlen) / sample_rate);
        fflush(stdout);
    }
    fe_end_utt(fe, cep_buf[0], &nframes);
    ckd_free_2d(cep_buf);
}
コード例 #6
0
ファイル: ps_test.c プロジェクト: AaronZhangL/pocketsphinx.js
int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    size_t nread;
    size_t nsamps;
    int32 nfr, i, score, prob;
    char const *hyp;
    char const *uttid;
    double n_speech, n_cpu, n_wall;
    ps_seg_t *seg;

    TEST_ASSERT(ps = ps_init(config));
    /* Test it first with pocketsphinx_decode_raw() */
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Test it with ps_process_raw() */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    nsamps = 2048;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        ps_process_raw(ps, buf, nread, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000000"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Now read the whole file and produce an MFCC buffer. */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr, NULL);
    cepbuf = ckd_calloc_2d(nfr + 1,
                   fe_get_output_size(ps->acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(ps->acmod->fe);
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL);
    fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i);

    /* Decode it with process_cep() */
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    for (i = 0; i < nfr; ++i) {
        ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000001"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;
        int32 post, lscr, ascr, lback;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
               logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
        TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0
    }

    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);
    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    fclose(rawfh);
    ps_free(ps);
    cmd_ln_free_r(config);
    ckd_free_2d(cepbuf);
    ckd_free(buf);

    return 0;
}
コード例 #7
0
ファイル: utt.c プロジェクト: Ankit77/cmusphinx
void
utt_decode(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid)
{
    kb_t *kb;
    kbcore_t *kbcore;
    cmd_ln_t *config;
    int32 num_decode_frame;
    int32 total_frame;
    stat_t *st;
    srch_t *s;

    num_decode_frame = 0;
    E_INFO("Processing: %s\n", uttid);

    kb = (kb_t *) data;
    kbcore = kb->kbcore;
    config = kbcore_config(kbcore);
    kb_set_uttid(uttid, ur->uttfile, kb);
    st = kb->stat;

    /* Convert input file to cepstra if waveform input is selected */
    if (cmd_ln_boolean_r(config, "-adcin")) {
        int16 *adcdata;
        int32 nsamps = 0;

        if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"),
    				        ur->uttfile,
    				        cmd_ln_str_r(config, "-cepext"),
    				        cmd_ln_int32_r(config, "-adchdr"),
    				        strcmp(cmd_ln_str_r(config, "-input_endian"), "big"),
    				        &nsamps)) == NULL) {
            E_FATAL("Cannot read file %s\n", ur->uttfile);
        }
        if (kb->mfcc) {
            ckd_free_2d((void **)kb->mfcc);
        }
        fe_start_utt(kb->fe);
        if (fe_process_utt(kb->fe, adcdata, nsamps, &kb->mfcc, &total_frame) < 0) {
            E_FATAL("MFCC calculation failed\n", ur->uttfile);
        }
        ckd_free(adcdata);
        if (total_frame > S3_MAX_FRAMES) {
            E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES);
        }
        if ((total_frame = feat_s2mfc2feat_live(kbcore_fcb(kbcore),
						kb->mfcc,
						&total_frame,
						TRUE, TRUE,
						kb->feat)) < 0) {
            E_FATAL("Feature computation failed\n");
        }
    }
    else {
        /* Read mfc file and build feature vectors for entire utterance */
        if ((total_frame = feat_s2mfc2feat(kbcore_fcb(kbcore), ur->uttfile,
                                           cmd_ln_str_r(config, "-cepdir"),
                                           cmd_ln_str_r(config, "-cepext"), sf, ef,
                                           kb->feat, S3_MAX_FRAMES)) < 0) {
            E_FATAL("Cannot read file %s. Forced exit\n", ur->uttfile);
        }
    }

    /* Also need to make sure we don't set resource if it is the same. Well, this mechanism
       could be provided inside the following function. 
    */
    s = kb->srch;
    if (ur->lmname != NULL)
        srch_set_lm(s, ur->lmname);
    if (ur->regmatname != NULL)
        kb_setmllr(ur->regmatname, ur->cb2mllrname, kb);
    /* These are necessary! */
    s->uttid = kb->uttid;
    s->uttfile = kb->uttfile;

    utt_begin(kb);
    utt_decode_block(kb->feat, total_frame, &num_decode_frame, kb);
    utt_end(kb);

    st->tot_fr += st->nfr;
}
コード例 #8
0
int
main(int argc, char *argv[])
{
    acmod_t *acmod;
    logmath_t *lmath;
    cmd_ln_t *config;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    mfcc_t **cepbuf, **cptr;
    size_t nread, nsamps;
    int nfr;
    int frame_counter;
    int bestsen1[270];

    lmath = logmath_init(1.0001, 0, 0);
    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-mdef", MODELDIR "/en-us/en-us/mdef",
                 "-mean", MODELDIR "/en-us/en-us/means",
                 "-var", MODELDIR "/en-us/en-us/variances",
                 "-tmat", MODELDIR "/en-us/en-us/transition_matrices",
                 "-sendump", MODELDIR "/en-us/en-us/sendump",
                 "-compallsen", "true",
                 "-cmn", "prior",
                 "-tmatfloor", "0.0001",
                 "-mixwfloor", "0.001",
                 "-varfloor", "0.0001",
                 "-mmap", "no",
                 "-topn", "4",
                 "-ds", "1",
                 "-input_endian", "little",
                 "-samprate", "16000", NULL);
    TEST_ASSERT(config);
    cmd_ln_parse_file_r(config, ps_args(), MODELDIR "/en-us/en-us/feat.params", FALSE);

    TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL));
    cmn_prior_set(acmod->fcb->cmn_struct, prior);

    nsamps = 2048;
    frame_counter = 0;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    TEST_EQUAL(0, acmod_start_utt(acmod));
    E_INFO("Incremental(2048):\n");
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        bptr = buf;
        while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0 || nread > 0) {
            int16 best_score;
            int frame_idx = -1, best_senid;
            while (acmod->n_feat_frame > 0) {
                acmod_score(acmod, &frame_idx);
                acmod_advance(acmod);
                best_score = acmod_best_score(acmod, &best_senid);
                E_INFO("Frame %d best senone %d score %d\n",
                       frame_idx, best_senid, best_score);
                TEST_EQUAL(frame_counter, frame_idx);
                if (frame_counter < 190)
                    bestsen1[frame_counter] = best_score;
                ++frame_counter;
                frame_idx = -1;
            }
        }
    }
    TEST_EQUAL(0, acmod_end_utt(acmod));
    nread = 0;
    {
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                bestsen1[frame_counter] = best_score;
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Now try to process the whole thing at once. */
    E_INFO("Whole utterance:\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    nsamps = ftell(rawfh) / sizeof(*buf);
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_SET);
    buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    bptr = buf;
    TEST_EQUAL(0, acmod_start_utt(acmod));
    acmod_process_raw(acmod, &bptr, &nsamps, TRUE);
    TEST_EQUAL(0, acmod_end_utt(acmod));
    {
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
               frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Now process MFCCs and make sure we get the same results. */
    cepbuf = ckd_calloc_2d(frame_counter,
                   fe_get_output_size(acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(acmod->fe);
    nsamps = ftell(rawfh) / sizeof(*buf);
    bptr = buf;
    nfr = frame_counter;
    fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL);
    fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr);

    E_INFO("Incremental(MFCC):\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    TEST_EQUAL(0, acmod_start_utt(acmod));
    cptr = cepbuf;
    nfr = frame_counter;
    frame_counter = 0;
    while ((acmod_process_cep(acmod, &cptr, &nfr, FALSE)) > 0) {
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            TEST_EQUAL(frame_counter, frame_idx);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            ++frame_counter;
            frame_idx = -1;
        }
    }
    TEST_EQUAL(0, acmod_end_utt(acmod));
    nfr = 0;
    acmod_process_cep(acmod, &cptr, &nfr, FALSE);
    {
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            TEST_EQUAL(frame_counter, frame_idx);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Note that we have to process the whole thing again because
     * !#@$@ s2mfc2feat modifies its argument (not for long) */
    fe_start_utt(acmod->fe);
    nsamps = ftell(rawfh) / sizeof(*buf);
    bptr = buf;
    nfr = frame_counter;
    fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr, NULL);
    fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr);

    E_INFO("Whole utterance (MFCC):\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    TEST_EQUAL(0, acmod_start_utt(acmod));
    cptr = cepbuf;
    nfr = frame_counter;
    acmod_process_cep(acmod, &cptr, &nfr, TRUE);
    TEST_EQUAL(0, acmod_end_utt(acmod));
    {
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    E_INFO("Rewound (MFCC):\n");
    TEST_EQUAL(0, acmod_rewind(acmod));
    {
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Clean up, go home. */
    ckd_free_2d(cepbuf);
    fclose(rawfh);
    ckd_free(buf);
    acmod_free(acmod);
    logmath_free(lmath);
    cmd_ln_free_r(config);
    return 0;
}
コード例 #9
0
ファイル: live.c プロジェクト: wdebeaum/cabot
int32 live_utt_decode_block (int16 *samples, int32 nsamples, 
			     int32 live_endutt, partialhyp_t **ohyp)
{
    static int32 live_begin_new_utt = 1;
    static int32 frmno;
    static float32 ***live_feat = NULL;
    
    int32   live_nfr, live_nfeatvec;
    int32   nwds =0;
    float32 **mfcbuf;
    /*    int i,j;*/
    /* 2004/08/27 L Galescu <*****@*****.**> -- added raw audio file saving */
    static char uttfn[1024];
    static FILE *rawfp = NULL;
    int16 block_peak_amplitude;

    if(live_feat==NULL)
        live_feat = feat_array_alloc (kbcore_fcb(kbcore), LIVEBUFBLOCKSIZE);
    
    if (live_begin_new_utt){
        fe_start_utt(fe);
        utt_begin (kb);
        frmno = 0;
        kb->nfr = 0;
        kb->utt_hmm_eval = 0;
        kb->utt_sen_eval = 0;
        kb->utt_gau_eval = 0;
        live_begin_new_utt = 0;
        sprintf(uttfn, "%s/%s.raw", cmd_ln_str("-outrawdir"), kb->uttid);
        rawfp = fopen(uttfn, "wb");
    }
    /* 10.jan.01 RAH, fe_process_utt now requires ***mfcbuf and it allocates the memory internally) */
    mfcbuf = NULL;

    /* LG 20080613 */
    block_peak_amplitude = get_peak_amplitude(samples, nsamples);
    if (block_peak_amplitude > peak_amplitude)
      peak_amplitude = block_peak_amplitude;
    E_INFO("segment peak %d\n",peak_amplitude);

    live_nfr = fe_process_utt(fe, samples, nsamples, &mfcbuf); /**/
    if (rawfp != NULL) {
      fwrite(samples, sizeof(int16), nsamples, rawfp);
      if (live_endutt)
	  fclose(rawfp);
    }

    if (live_endutt) {
        /* RAH, It seems that we shouldn't throw out this data */
        fe_end_utt(fe,dummyframe); /* Flush out the fe */
    }
#if 0
    E_INFO("Number frame after fe_process_utt %d\n",live_nfr);
    for(i=0;i<live_nfr;i++){
      printf("%d ",i);
      for(j=0;j<13;j++){
        printf("%f ",mfcbuf[i][j]);
        fflush(stdout);
      }
      printf("\n");
      fflush(stdout);
    }
#endif
    /* lgalescu 2004/08/22 -- i am under the impression that 
     * feat_s2mfc2feat_block() needs to be called at the end of utt 
     * even if no frames need processing
     */
    /* lgalescu 2004/10/13 -- rescinded the above */
    if(live_nfr>0){
      /* Compute feature vectors */
      live_nfeatvec = feat_s2mfc2feat_block(kbcore_fcb(kbcore), mfcbuf,
					    live_nfr, live_begin_new_utt,
					    live_endutt, live_feat);

#if 0   
    E_INFO ("live_nfeatvec: %ld\n",live_nfeatvec);
#endif
#if 0
      E_INFO("Current frame number %d, Number of frames %d, Number frame after feat_s2mfcfeat_block %d\n",frmno,live_nfr,live_nfeatvec);
      
      for(i=0;i<live_nfeatvec;i++){
        printf("%d\n",i);
        printf("Cep: ");
        fflush(stdout);
        for(j=0;j<13;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout);
        }
        printf("\n");
        fflush(stdout);
        printf("Del: ");
        fflush(stdout);
        for(j=13;j<26;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout); 
        }
        printf("\n");
        fflush(stdout);
        printf("Acc: ");
        fflush(stdout);
        for(j=26;j<39;j++){
	  printf("%f ",live_feat[i][0][j]);
	  fflush(stdout);
        }
        printf("\n");
        fflush(stdout);
        
      }
#endif
    
      /* decode the block */
      utt_decode_block (live_feat, live_nfeatvec, &frmno, kb, 
			maxwpf, maxhistpf, maxhmmpf, ptranskip, hmmdumpfp);

      /* lgalescu 2004/08/21
       * moved the following block out of the previous if(){} because we need 
       * the output even when no feature computation has to be done.
       */
      /* lgalescu 2004/10/13 -- rescinded */

      /* Pull out partial hypothesis */
      nwds =  live_get_partialhyp(live_endutt);
      *ohyp = parthyp;
      parthyplen = nwds;
    }

    /* Clean up */
    if (live_endutt) {
      live_begin_new_utt = 1;
      kb->tot_fr += kb->nfr;
      utt_end(kb);
    }
    else {
      live_begin_new_utt = 0;
    }
    
    /* I'm starting to think that fe_process_utt should not be allocating its 
     * memory, that or it should allocate some max and just keep on going, 
     * this idea of constantly allocating freeing memory seems dangerous to me.
     */
    /* 20040318 ARCHAN : It sounds extremely dangerous to me and I will 
     * eliminate it sometime. 
     */
    /* lgalescu: i second that! the memory issue needs to be investigated: after a run on linux, i noticed some 1.6M of memory having "disappeared"! */

    if(live_nfr>0){
      ckd_free_2d((void **) mfcbuf); /* RAH, this must be freed since fe_process_utt allocates it */
    }

    return(parthyplen);
}
コード例 #10
0
fe_t *
fe_init(param_t const *P)
{
    fe_t *FE = (fe_t *) calloc(1, sizeof(fe_t));

    if (FE == NULL) {
        E_WARN("memory alloc failed in fe_init()\n");
        return (NULL);
    }

    /* transfer params to front end */
    fe_parse_general_params(P, FE);

    /* compute remaining FE parameters */
    /* We add 0.5 so approximate the float with the closest
     * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4
     */
    FE->FRAME_SHIFT = (int32) (FE->SAMPLING_RATE / FE->FRAME_RATE + 0.5);       /* why 0.5? */
    FE->FRAME_SIZE = (int32) (FE->WINDOW_LENGTH * FE->SAMPLING_RATE + 0.5);     /* why 0.5? */
    FE->PRIOR = 0;
    FE->FRAME_COUNTER = 0;

    if (FE->FRAME_SIZE > (FE->FFT_SIZE)) {
        E_WARN
            ("Number of FFT points has to be a power of 2 higher than %d\n",
             (FE->FRAME_SIZE));
        return (NULL);
    }

    if (FE->dither) {
        fe_init_dither(FE->seed);
    }

    /* establish buffers for overflow samps and hamming window */
    FE->OVERFLOW_SAMPS = (int16 *) calloc(FE->FRAME_SIZE, sizeof(int16));
    FE->HAMMING_WINDOW =
        (window_t *) calloc(FE->FRAME_SIZE, sizeof(window_t));

    if (FE->OVERFLOW_SAMPS == NULL || FE->HAMMING_WINDOW == NULL) {
        E_WARN("memory alloc failed in fe_init()\n");
        return (NULL);
    }

    /* create hamming window */
    fe_create_hamming(FE->HAMMING_WINDOW, FE->FRAME_SIZE);

    /* init and fill appropriate filter structure */
    if (FE->FB_TYPE == MEL_SCALE) {
        if ((FE->MEL_FB = (melfb_t *) calloc(1, sizeof(melfb_t))) == NULL) {
            E_WARN("memory alloc failed in fe_init()\n");
            return (NULL);
        }
        /* transfer params to mel fb */
        fe_parse_melfb_params(P, FE->MEL_FB);

        fe_build_melfilters(FE->MEL_FB);
        fe_compute_melcosine(FE->MEL_FB);
    }
    else {
        E_WARN("MEL SCALE IS CURRENTLY THE ONLY IMPLEMENTATION!\n");
        return (NULL);
    }

    if (P->verbose) {
        fe_print_current(FE);
    }

    /*** Z.A.B. ***/
    /*** Initialize the overflow buffers ***/
    fe_start_utt(FE);

    return (FE);
}
コード例 #11
0
ファイル: test_fe.c プロジェクト: AaronZhangL/pocketsphinx.js
int
main(int argc, char *argv[])
{
    static const arg_t fe_args[] = {
        waveform_to_cepstral_command_line_macro(),
        { NULL, 0, NULL, NULL }
    };
    FILE *raw;
    cmd_ln_t *config;
    fe_t *fe;
    int16 buf[1024];
    int16 const *inptr;
    int32 frame_shift, frame_size;
    mfcc_t **cepbuf1, **cepbuf2, **cptr;
    int32 nfr, i;
    size_t nsamp;

    TEST_ASSERT(config = cmd_ln_parse_r(NULL, fe_args, argc, argv, FALSE));
    TEST_ASSERT(fe = fe_init_auto_r(config));

    TEST_EQUAL(fe_get_output_size(fe), DEFAULT_NUM_CEPSTRA);

    fe_get_input_size(fe, &frame_shift, &frame_size);
    TEST_EQUAL(frame_shift, DEFAULT_FRAME_SHIFT);
    TEST_EQUAL(frame_size, (int)(DEFAULT_WINDOW_LENGTH*DEFAULT_SAMPLING_RATE));

    TEST_ASSERT(raw = fopen(TESTDATADIR "/chan3.raw", "rb"));

    TEST_EQUAL(0, fe_start_utt(fe));
    TEST_EQUAL(1024, fread(buf, sizeof(int16), 1024, raw));

    nsamp = 1024;
    TEST_ASSERT(fe_process_frames(fe, NULL, &nsamp, NULL, &nfr, NULL) >= 0);
    TEST_EQUAL(1024, nsamp);
    TEST_EQUAL(4, nfr);

    cepbuf1 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf1));
    inptr = &buf[0];
    nfr = 1;

    printf("frame_size %d frame_shift %d\n", frame_size, frame_shift);
    /* Process the first frame. */
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[0], &nfr, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
    /* First frame assumed to be unvoiced to init noise reduction */
    TEST_EQUAL(nfr, 0);

    /* Note that this next one won't actually consume any frames
     * of input, because it already got sufficient overflow
     * samples last time around.  This is implementation-dependent
     * so we shouldn't actually test for it. 
     * First 1024 samples of chan3.raw is silence, nfr is expected to stay 0 */
    nfr = 1;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[1], &nfr, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
    TEST_EQUAL(nfr, 0);
    
    nfr = 1;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[2], &nfr, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
    TEST_EQUAL(nfr, 0);

    nfr = 1;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[3], &nfr, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
    TEST_EQUAL(nfr, 0);

    nfr = 1;
    TEST_ASSERT(fe_end_utt(fe, cepbuf1[4], &nfr) >= 0);
    printf("nfr %d\n", nfr);
    TEST_EQUAL(nfr, 0);

    /* What we *should* test is that the output we get by
     * processing one frame at a time is exactly the same as what
     * we get from doing them all at once.  So let's do that */
    cepbuf2 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf2));
    inptr = &buf[0];
    nfr = 5;
    nsamp = 1024;
    TEST_EQUAL(0, fe_start_utt(fe));
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cepbuf2, &nfr, NULL) >= 0);
    /* First 1024 samples of chan3.raw is silence, nfr is expected to stay 0 */
    printf("nfr %d\n", nfr);
    TEST_EQUAL(nfr, 0);
    nfr = 1;
    TEST_ASSERT(fe_end_utt(fe, cepbuf2[4], &nfr) >= 0);
    printf("nfr %d\n", nfr);
    TEST_EQUAL(nfr, 0);
    /* fe_process_frames overwrites features if frame is unvoiced, 
     * so for cepbuf2 last frame is at 0 and previous are lost */
    printf("%d: ", 3);
    for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) {
        printf("%.2f,%.2f ",
               MFCC2FLOAT(cepbuf1[3][i]),
               MFCC2FLOAT(cepbuf2[0][i]));
        TEST_EQUAL_FLOAT(cepbuf1[3][i], cepbuf2[0][i]);
    }
    printf("\n");
    /* output features stored in cepbuf[4] by fe_end_utt 
     * should be the same */
    printf("%d: ", 4);
    for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) {
        printf("%.2f,%.2f ",
               MFCC2FLOAT(cepbuf1[4][i]),
               MFCC2FLOAT(cepbuf2[4][i]));
        TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[4][i]);
    }
    printf("\n");

    /* Now, also test to make sure that even if we feed data in
     * little tiny bits we can still make things work. */
    memset(cepbuf2[0], 0, 5 * DEFAULT_NUM_CEPSTRA * sizeof(**cepbuf2));
    inptr = &buf[0];
    cptr = &cepbuf2[0];
    nfr = 5;
    i = 5;
    nsamp = 256;
    TEST_EQUAL(0, fe_start_utt(fe));
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
    cptr += i;
    nfr -= i;
    i = nfr;
    nsamp = 256;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
    cptr += i;
    nfr -= i;
    i = nfr;
    nsamp = 256;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
    cptr += i;
    nfr -= i;
    i = nfr;
    nsamp = 256;
    TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i, NULL) >= 0);
    printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
    cptr += i;
    nfr -= i;
    printf("nfr %d\n", nfr);
    TEST_EQUAL(nfr, 5);
    /* inptr contains unvoiced audio, 
     * no out feature frames will be produced */
    TEST_ASSERT(fe_end_utt(fe, *cptr, &nfr) >= 0);
    printf("nfr %d\n", nfr);
    TEST_EQUAL(nfr, 0);

    /* fe_process_frames overwrites features if frame is unvoiced, 
     * so for cepbuf2 last frame is at 0 and previous are lost */
    printf("%d: ", 4);
    for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) {
        printf("%.2f,%.2f ",
               MFCC2FLOAT(cepbuf1[4][i]),
               MFCC2FLOAT(cepbuf2[0][i]));
        TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[0][i]);
    }
    printf("\n");

    /* And now, finally, test fe_process_utt() */
    inptr = &buf[0];
    i = 0;
    TEST_EQUAL(0, fe_start_utt(fe));
    TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
    printf("i %d nfr %d\n", i, nfr);
    if (nfr)
        memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
    ckd_free_2d(cptr);
    i += nfr;
    inptr += 256;
    TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
    printf("i %d nfr %d\n", i, nfr);
    if (nfr)
        memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
    ckd_free_2d(cptr);
    i += nfr;
    inptr += 256;
    TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
    printf("i %d nfr %d\n", i, nfr);
    if (nfr)
        memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
    ckd_free_2d(cptr);
    i += nfr;
    inptr += 256;
    TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
    printf("i %d nfr %d\n", i, nfr);
    if (nfr)
        memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
    ckd_free_2d(cptr);
    i += nfr;
    inptr += 256;
    TEST_ASSERT(fe_end_utt(fe, cepbuf2[i], &nfr) >= 0);
    printf("i %d nfr %d\n", i, nfr);
    TEST_EQUAL(nfr, 0);

    /* fe_process_utt overwrites features if frame is unvoiced, 
     * so for cepbuf2 last frame is at 0 and previous are lost */
    printf("%d: ", 4);
    for (i = 0; i < DEFAULT_NUM_CEPSTRA; ++i) {
        printf("%.2f,%.2f ",
               MFCC2FLOAT(cepbuf1[4][i]),
               MFCC2FLOAT(cepbuf2[0][i]));
        TEST_EQUAL_FLOAT(cepbuf1[4][i], cepbuf2[0][i]);
    }
    printf("\n");

    ckd_free_2d(cepbuf1);
    ckd_free_2d(cepbuf2);
    fclose(raw);
    fe_free(fe);
    cmd_ln_free_r(config);

    return 0;
}
コード例 #12
0
void
ld_process_raw_impl(live_decoder_t * _decoder,
                    int16 * samples, int32 num_samples, int32 end_utt)
{
    float32 dummy_frame[MAX_CEP_LEN];
    float32 **frames = 0;
    int32 num_frames = 0;
    int32 num_features = 0;
    int32 begin_utt = _decoder->num_frames_entered == 0;
    int32 return_value;
    int i;

    assert(_decoder != NULL);

    if (begin_utt) {
        fe_start_utt(_decoder->fe);
    }

    if (_decoder->swap) {
        for (i = 0; i < num_samples; i++) {
            SWAP_INT16(samples + i);
        }
    }

    return_value =
        fe_process_utt(_decoder->fe, samples, num_samples, &frames,
                       &num_frames);

    if (end_utt) {
        return_value = fe_end_utt(_decoder->fe, dummy_frame, &num_frames);
        if (num_frames != 0) {
            /* ARCHAN: If num_frames !=0, assign this last ending frame to
               frames again.  The computation will then be correct.  Should
               clean up the finite state logic in fe_interface layer. 
             */
            frames =
                (float32 **) ckd_calloc_2d(1, _decoder->fe->NUM_CEPSTRA,
                                           sizeof(float32));
            memcpy(frames[0], dummy_frame,
                   _decoder->fe->NUM_CEPSTRA * sizeof(float32));
        }
    }

    if (FE_ZERO_ENERGY_ERROR == return_value) {
        E_WARN("Zero energy frame(s). Consider using dither\n");
    }

    if (num_frames > 0) {
        num_features = feat_s2mfc2feat_block(kbcore_fcb(_decoder->kbcore),
                                             frames,
                                             num_frames,
                                             begin_utt,
                                             end_utt, _decoder->features);
        _decoder->num_frames_entered += num_frames;
    }

    if (num_features > 0) {
        utt_decode_block(_decoder->features,
                         num_features,
                         &_decoder->num_frames_decoded, &_decoder->kb);
    }

    if (frames != NULL) {
        ckd_free_2d((void **) frames);
    }
}
コード例 #13
0
ファイル: cdcn_test.c プロジェクト: Jared-Prime/cmusphinx
int main (int32 argc, char *argv[])
{
    char line[4096], filename[4096], idspec[4096], *uttid, *result;
    int32 sf, ef, sps, adcin, nf;
    int16 adbuf[4096];
    int32 i, k;
    float32 **mfcbuf;
    CDCN_type *cdcn;
    param_t param;
    fe_t *fe = NULL;


    fbs_init (argc, argv);
    /* Assume that cdcn_init is part of the above fbs_init() */
    cdcn = uttproc_get_cdcn_ptr();

    adcin = query_adc_input();
    assert (adcin);	/* Limited to processing audio input files (not cep) */
    sps = query_sampling_rate();

    fe_init_params(&param);
    param.SAMPLING_RATE = (float)sps;

    if ((fe = fe_init (&param)) == NULL)
    {
        E_ERROR("fe_init() failed to initialize\n");
        exit (-1);
    }
    mfcbuf = (float32 **) ckd_calloc_2d (8192, 13, sizeof(float32));

    /* Process "control file" input through stdin */
    while (fgets (line, sizeof(line), stdin) != NULL) {
        if (uttproc_parse_ctlfile_entry (line, filename, &sf, &ef, idspec) < 0)
            continue;
        assert ((sf < 0) && (ef < 0));	/* Processing entire input file */

        uttid = build_uttid (idspec);

        uttproc_begin_utt (uttid);

        /* Convert raw data file to cepstra */
        if (uttfile_open (filename) < 0) {
            E_ERROR("uttfile_open(%s) failed\n", filename);
            continue;
        }
        fe_start_utt(fe);
        nf = 0;
        while ((k = adc_file_read (adbuf, 4096)) >= 0) {
            if (fe_process_utt (fe, adbuf, k, mfcbuf+nf, &k) == FE_ZERO_ENERGY_ERROR) {
                E_WARN("Frames with zero energy. Consider using dither\n");
            }
            nf += k;
            /* WARNING!! No check for mfcbuf overflow */
        }
        fe_end_utt(fe, mfcbuf[nf], &k);
        fe_close(fe);
        uttfile_close ();

        if (nf <= 0) {
            E_ERROR("Empty utterance\n");
            continue;
        } else
            E_INFO("%d frames\n", nf);

        /* Update CDCN module */
        cdcn_converged_update (mfcbuf, /* cepstra buffer */
                               nf, /* Number of frames */
                               cdcn, /* The CDCN wrapper */
                               1 /* One iteration */
                              );

        /* CDCN */
        for (i = 0; i < nf; i++)
            cdcn_norm (mfcbuf[i], cdcn);

        /* Process normalized cepstra */
        uttproc_cepdata (mfcbuf, nf, 1);
        uttproc_end_utt ();
        uttproc_result (&k, &result, 1);
        printf ("\n");
        fflush (stdout);

    }

    ckd_free_2d((void **)mfcbuf);

    fbs_end ();
    return 0;
}