Exemplo n.º 1
0
int
acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
{
    /* Output vector dimension needs to be the same. */
    if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) {
	E_ERROR("Configured feature length %d doesn't match feature extraction output size %d\n", 
		cmd_ln_int32_r(acmod->config, "-ceplen"), 
		fe_get_output_size(fe));
        return TRUE;
    }
    /* Feature parameters need to be the same. */
    /* ... */
    return FALSE;
}
Exemplo n.º 2
0
static int
acmod_process_full_raw(acmod_t *acmod,
                       int16 const **inout_raw,
                       size_t *inout_n_samps)
{
    int32 nfr, ntail;
    mfcc_t **cepptr;

    /* Write to logging file if any. */
    if (acmod->rawfh)
        fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
    /* Resize mfc_buf to fit. */
    if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
        return -1;
    if (acmod->n_mfc_alloc < nfr + 1) {
        ckd_free_2d(acmod->mfc_buf);
        acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
                                       sizeof(**acmod->mfc_buf));
        acmod->n_mfc_alloc = nfr + 1;
    }
    acmod->n_mfc_frame = 0;
    acmod->mfc_outidx = 0;
    fe_start_utt(acmod->fe);
    if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
                          acmod->mfc_buf, &nfr) < 0)
        return -1;
    fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
    nfr += ntail;

    cepptr = acmod->mfc_buf;
    nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
    acmod->n_mfc_frame = 0;
    return nfr;
}
Exemplo n.º 3
0
void
segment_audio()
{
    FILE *file;
    int16 pcm_buf[BLOCKSIZE];
    mfcc_t **cep_buf;
    int16 voiced_buf = NULL;
    int32 voiced_nsamps, out_frameidx, uttstart = 0;
    char file_name[1024];
    uint8 cur_vad_state, vad_state, writing;
    int uttno, uttlen, sample_rate;
    int32 nframes, nframes_tmp;
    int16 frame_size, frame_shift, frame_rate;
    size_t k;

    sample_rate = (int) cmd_ln_float32_r(config, "-samprate");
    frame_rate = cmd_ln_int32_r(config, "-frate");
    frame_size =
        (int32) (cmd_ln_float32_r(config, "-wlen") * sample_rate + 0.5);
    frame_shift =
        (int32) (sample_rate / cmd_ln_int32_r(config, "-frate") + 0.5);
    nframes = (BLOCKSIZE - frame_size) / frame_shift;
    cep_buf =
        (mfcc_t **) ckd_calloc_2d(nframes, fe_get_output_size(fe),
                                  sizeof(mfcc_t));

    uttno = 0;
    uttlen = 0;
    cur_vad_state = 0;
    voiced_nsamps = 0;
    writing = 0;
    file = NULL;
    fe_start_stream(fe);
    fe_start_utt(fe);
    while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) {
        int16 const *pcm_buf_tmp;
        pcm_buf_tmp = &pcm_buf[0];
        while (k) {
            nframes_tmp = nframes;
            fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf,
                                  &nframes_tmp, voiced_buf,
                                  &voiced_nsamps, &out_frameidx);
            if (out_frameidx > 0) {
        	uttstart = out_frameidx;
            }
            vad_state = fe_get_vad_state(fe);
            if (!cur_vad_state && vad_state) {
                /* silence->speech transition, time to start new file */
                uttno++;
                if (!singlefile) {
                    sprintf(file_name, "%s%04d.raw", infile_path, uttno);
                    if ((file = fopen(file_name, "wb")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
                } else {
                    sprintf(file_name, "%s.raw", infile_path);
                    if ((file = fopen(file_name, "ab")) == NULL)
                          E_FATAL_SYSTEM("Failed to open '%s' for writing",
                                         file_name);
		}
		writing = 1;
            }

            if (writing && file && voiced_nsamps > 0) {
                fwrite(voiced_buf, sizeof(int16), voiced_nsamps, file);
                uttlen += voiced_nsamps;
            }

            if (cur_vad_state && !vad_state) {
                /* speech -> silence transition, time to finish file */
                fclose(file);
	        printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    		       uttno,
    		       file_name,
    	    	       ((double) uttstart) / frame_rate,
            	        uttlen,
            	       ((double) uttlen) / sample_rate);
                fflush(stdout);
                fe_end_utt(fe, cep_buf[0], &nframes_tmp);
                writing = 0;
                uttlen = 0;
                voiced_nsamps = 0;
                fe_start_utt(fe);
            }
            cur_vad_state = vad_state;
        }
    }

    if (writing) {
        fclose(file);
	printf("Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
    	        uttno,
    		file_name,
    	    	((double) uttstart) / frame_rate,
            	uttlen,
                ((double) uttlen) / sample_rate);
        fflush(stdout);
    }
    fe_end_utt(fe, cep_buf[0], &nframes);
    ckd_free_2d(cep_buf);
}
Exemplo n.º 4
0
int
ps_decoder_test(cmd_ln_t *config, char const *sname, char const *expected)
{
    ps_decoder_t *ps;
    mfcc_t **cepbuf;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    size_t nread;
    size_t nsamps;
    int32 nfr, i, score, prob;
    char const *hyp;
    char const *uttid;
    double n_speech, n_cpu, n_wall;
    ps_seg_t *seg;

    TEST_ASSERT(ps = ps_init(config));
    /* Test it first with pocketsphinx_decode_raw() */
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    ps_decode_raw(ps, rawfh, "goforward", -1);
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Test it with ps_process_raw() */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    nsamps = 2048;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        ps_process_raw(ps, buf, nread, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000000"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    /* Now read the whole file and produce an MFCC buffer. */
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_END);
    nsamps = ftell(rawfh) / sizeof(*buf);
    fseek(rawfh, 0, SEEK_SET);
    bptr = buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, NULL, &nfr);
    cepbuf = ckd_calloc_2d(nfr + 1,
                   fe_get_output_size(ps->acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(ps->acmod->fe);
    fe_process_frames(ps->acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(ps->acmod->fe, cepbuf[nfr], &i);

    /* Decode it with process_cep() */
    TEST_EQUAL(0, ps_start_utt(ps, NULL));
    for (i = 0; i < nfr; ++i) {
        ps_process_cep(ps, cepbuf + i, 1, FALSE, FALSE);
    }
    TEST_EQUAL(0, ps_end_utt(ps));
    hyp = ps_get_hyp(ps, &score, &uttid);
    prob = ps_get_prob(ps, &uttid);
    printf("%s (%s): %s (%d, %d)\n", sname, uttid, hyp, score, prob);
    TEST_EQUAL(0, strcmp(uttid, "000000001"));
    TEST_EQUAL(0, strcmp(hyp, expected));
    TEST_ASSERT(prob <= 0);
    for (seg = ps_seg_iter(ps, &score); seg;
         seg = ps_seg_next(seg)) {
        char const *word;
        int sf, ef;
        int32 post, lscr, ascr, lback;

        word = ps_seg_word(seg);
        ps_seg_frames(seg, &sf, &ef);
        post = ps_seg_prob(seg, &ascr, &lscr, &lback);
        printf("%s (%d:%d) P(w|o) = %f ascr = %d lscr = %d lback = %d\n", word, sf, ef,
               logmath_exp(ps_get_logmath(ps), post), ascr, lscr, lback);
        TEST_ASSERT(post <= 2); // Due to numerical errors with float it sometimes could go out of 0
    }

    ps_get_utt_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("%.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("%.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);
    ps_get_all_time(ps, &n_speech, &n_cpu, &n_wall);
    printf("TOTAL: %.2f seconds speech, %.2f seconds CPU, %.2f seconds wall\n",
           n_speech, n_cpu, n_wall);
    printf("TOTAL: %.2f xRT (CPU), %.2f xRT (elapsed)\n",
           n_cpu / n_speech, n_wall / n_speech);

    fclose(rawfh);
    ps_free(ps);
    cmd_ln_free_r(config);
    ckd_free_2d(cepbuf);
    ckd_free(buf);

    return 0;
}
Exemplo n.º 5
0
int
main(int argc, char *argv[])
{
    acmod_t *acmod;
    logmath_t *lmath;
    cmd_ln_t *config;
    FILE *rawfh;
    int16 *buf;
    int16 const *bptr;
    mfcc_t **cepbuf, **cptr;
    size_t nread, nsamps;
    int nfr;
    int frame_counter;
    int bestsen1[270];

    lmath = logmath_init(1.0001, 0, 0);
    config = cmd_ln_init(NULL, ps_args(), TRUE,
                 "-featparams", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/feat.params",
                 "-mdef", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/mdef",
                 "-mean", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/means",
                 "-var", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/variances",
                 "-tmat", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/transition_matrices",
                 "-sendump", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/sendump",
                 "-compallsen", "true",
                 "-cmn", "prior",
                 "-tmatfloor", "0.0001",
                 "-mixwfloor", "0.001",
                 "-varfloor", "0.0001",
                 "-mmap", "no",
                 "-topn", "4",
                 "-ds", "1",
                 "-input_endian", "little",
                 "-samprate", "16000", NULL);
    TEST_ASSERT(config);
    TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL));
    cmn_prior_set(acmod->fcb->cmn_struct, prior);

    nsamps = 2048;
    frame_counter = 0;
    buf = ckd_calloc(nsamps, sizeof(*buf));
    TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb"));
    TEST_EQUAL(0, acmod_start_utt(acmod));
    E_INFO("Incremental(2048):\n");
    while (!feof(rawfh)) {
        nread = fread(buf, sizeof(*buf), nsamps, rawfh);
        bptr = buf;
        while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0 || nread > 0) {
            int16 const *senscr;
            int16 best_score;
            int frame_idx = -1, best_senid;
            while (acmod->n_feat_frame > 0) {
                senscr = acmod_score(acmod, &frame_idx);
                acmod_advance(acmod);
                best_score = acmod_best_score(acmod, &best_senid);
                E_INFO("Frame %d best senone %d score %d\n",
                       frame_idx, best_senid, best_score);
                TEST_EQUAL(frame_counter, frame_idx);
                if (frame_counter < 190)
                    bestsen1[frame_counter] = best_score;
                ++frame_counter;
                frame_idx = -1;
            }
        }
    }
    TEST_EQUAL(0, acmod_end_utt(acmod));
    nread = 0;
    {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                bestsen1[frame_counter] = best_score;
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Now try to process the whole thing at once. */
    E_INFO("Whole utterance:\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    nsamps = ftell(rawfh) / sizeof(*buf);
    clearerr(rawfh);
    fseek(rawfh, 0, SEEK_SET);
    buf = ckd_realloc(buf, nsamps * sizeof(*buf));
    TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh));
    bptr = buf;
    TEST_EQUAL(0, acmod_start_utt(acmod));
    acmod_process_raw(acmod, &bptr, &nsamps, TRUE);
    TEST_EQUAL(0, acmod_end_utt(acmod));
    {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
               frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Now process MFCCs and make sure we get the same results. */
    cepbuf = ckd_calloc_2d(frame_counter,
                   fe_get_output_size(acmod->fe),
                   sizeof(**cepbuf));
    fe_start_utt(acmod->fe);
    nsamps = ftell(rawfh) / sizeof(*buf);
    bptr = buf;
    nfr = frame_counter;
    fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr);

    E_INFO("Incremental(MFCC):\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    TEST_EQUAL(0, acmod_start_utt(acmod));
    cptr = cepbuf;
    nfr = frame_counter;
    frame_counter = 0;
    while ((acmod_process_cep(acmod, &cptr, &nfr, FALSE)) > 0) {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            TEST_EQUAL(frame_counter, frame_idx);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            ++frame_counter;
            frame_idx = -1;
        }
    }
    TEST_EQUAL(0, acmod_end_utt(acmod));
    nfr = 0;
    acmod_process_cep(acmod, &cptr, &nfr, FALSE);
    {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            TEST_EQUAL(frame_counter, frame_idx);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Note that we have to process the whole thing again because
     * !#@$@ s2mfc2feat modifies its argument (not for long) */
    fe_start_utt(acmod->fe);
    nsamps = ftell(rawfh) / sizeof(*buf);
    bptr = buf;
    nfr = frame_counter;
    fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr);
    fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr);

    E_INFO("Whole utterance (MFCC):\n");
    cmn_prior_set(acmod->fcb->cmn_struct, prior);
    TEST_EQUAL(0, acmod_start_utt(acmod));
    cptr = cepbuf;
    nfr = frame_counter;
    acmod_process_cep(acmod, &cptr, &nfr, TRUE);
    TEST_EQUAL(0, acmod_end_utt(acmod));
    {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    E_INFO("Rewound (MFCC):\n");
    TEST_EQUAL(0, acmod_rewind(acmod));
    {
        int16 const *senscr;
        int16 best_score;
        int frame_idx = -1, best_senid;
        frame_counter = 0;
        while (acmod->n_feat_frame > 0) {
            senscr = acmod_score(acmod, &frame_idx);
            acmod_advance(acmod);
            best_score = acmod_best_score(acmod, &best_senid);
            E_INFO("Frame %d best senone %d score %d\n",
                   frame_idx, best_senid, best_score);
            if (frame_counter < 190)
                TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]);
            TEST_EQUAL(frame_counter, frame_idx);
            ++frame_counter;
            frame_idx = -1;
        }
    }

    /* Clean up, go home. */
    ckd_free_2d(cepbuf);
    fclose(rawfh);
    ckd_free(buf);
    acmod_free(acmod);
    logmath_free(lmath);
    cmd_ln_free_r(config);
    return 0;
}
Exemplo n.º 6
0
int
main(int argc, char *argv[])
{
	static const arg_t fe_args[] = {
		waveform_to_cepstral_command_line_macro(),
		{ NULL, 0, NULL, NULL }
	};
	FILE *raw;
	cmd_ln_t *config;
	fe_t *fe;
	int16 buf[1024];
	int16 const *inptr;
	int32 frame_shift, frame_size;
	mfcc_t **cepbuf1, **cepbuf2, **cptr;
	int32 nfr, i;
	size_t nsamp;

	TEST_ASSERT(config = cmd_ln_parse_r(NULL, fe_args, argc, argv, FALSE));
	TEST_ASSERT(fe = fe_init_auto_r(config));

	TEST_EQUAL(fe_get_output_size(fe), DEFAULT_NUM_CEPSTRA);

	fe_get_input_size(fe, &frame_shift, &frame_size);
	TEST_EQUAL(frame_shift, DEFAULT_FRAME_SHIFT);
	TEST_EQUAL(frame_size, (int)(DEFAULT_WINDOW_LENGTH*DEFAULT_SAMPLING_RATE));

	TEST_ASSERT(raw = fopen(TESTDATADIR "/chan3.raw", "rb"));

	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_EQUAL(1024, fread(buf, sizeof(int16), 1024, raw));

	nsamp = 1024;
	TEST_ASSERT(fe_process_frames(fe, NULL, &nsamp, NULL, &nfr) >= 0);
	TEST_EQUAL(1024, nsamp);
	TEST_EQUAL(4, nfr);

	cepbuf1 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf1));
	inptr = &buf[0];
	nfr = 1;

	printf("frame_size %d frame_shift %d\n", frame_size, frame_shift);
	/* Process the first frame. */
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[0], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	/* Note that this next one won't actually consume any frames
	 * of input, because it already got sufficient overflow
	 * samples last time around.  This is implementation-dependent
	 * so we shouldn't actually test for it. */
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[1], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[2], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, &cepbuf1[3], &nfr) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, nfr);
	TEST_EQUAL(nfr, 1);

	TEST_ASSERT(fe_end_utt(fe, cepbuf1[4], &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	/* What we *should* test is that the output we get by
	 * processing one frame at a time is exactly the same as what
	 * we get from doing them all at once.  So let's do that */
	cepbuf2 = ckd_calloc_2d(5, DEFAULT_NUM_CEPSTRA, sizeof(**cepbuf2));
	inptr = &buf[0];
	nfr = 5;
	nsamp = 1024;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cepbuf2, &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 4);
	nfr = 1;
	TEST_ASSERT(fe_end_utt(fe, cepbuf2[4], &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	/* Now, also test to make sure that even if we feed data in
	 * little tiny bits we can still make things work. */
	memset(cepbuf2[0], 0, 5 * DEFAULT_NUM_CEPSTRA * sizeof(**cepbuf2));
	inptr = &buf[0];
	cptr = &cepbuf2[0];
	nfr = 5;
	i = 5;
	nsamp = 256;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	i = nfr;
	nsamp = 256;
	TEST_ASSERT(fe_process_frames(fe, &inptr, &nsamp, cptr, &i) >= 0);
	printf("inptr %d nsamp %d nfr %d\n", inptr - buf, nsamp, i);
	cptr += i;
	nfr -= i;
	TEST_ASSERT(fe_end_utt(fe, *cptr, &nfr) >= 0);
	printf("nfr %d\n", nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	/* And now, finally, test fe_process_utt() */
	inptr = &buf[0];
	i = 0;
	TEST_EQUAL(0, fe_start_utt(fe));
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_process_utt(fe, inptr, 256, &cptr, &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	if (nfr)
		memcpy(cepbuf2[i], cptr[0], nfr * DEFAULT_NUM_CEPSTRA * sizeof(**cptr));
	ckd_free_2d(cptr);
	i += nfr;
	inptr += 256;
	TEST_ASSERT(fe_end_utt(fe, cepbuf2[i], &nfr) >= 0);
	printf("i %d nfr %d\n", i, nfr);
	TEST_EQUAL(nfr, 1);

	for (i = 0; i < 5; ++i) {
		int j;
		printf("%d: ", i);
		for (j = 0; j < DEFAULT_NUM_CEPSTRA; ++j) {
			printf("%.2f,%.2f ",
			       MFCC2FLOAT(cepbuf1[i][j]),
			       MFCC2FLOAT(cepbuf2[i][j]));
			TEST_EQUAL_FLOAT(cepbuf1[i][j], cepbuf2[i][j]);
		}
		printf("\n");
	}

	ckd_free_2d(cepbuf1);
	ckd_free_2d(cepbuf2);
	fclose(raw);
	fe_free(fe);

	return 0;
}
Exemplo n.º 7
0
int
sphinx_wave2feat_convert_file(sphinx_wave2feat_t *wtf,
                              char const *infile, char const *outfile)
{
    int nchans, minfft, nfft, nfloat, veclen;
    audio_type_t const *atype;
    int fshift, fsize;

    if (cmd_ln_boolean_r(wtf->config, "-verbose"))
        E_INFO("Converting %s to %s\n", infile, outfile);

    wtf->infile = ckd_salloc(infile);

    /* Detect input file type. */
    if ((atype = detect_audio_type(wtf)) == NULL)
        return -1;

    /* Determine whether to byteswap input. */
    wtf->byteswap = strcmp(cmd_ln_str_r(wtf->config, "-mach_endian"),
                           cmd_ln_str_r(wtf->config, "-input_endian"));

    /* Make sure the FFT size is sufficiently large. */
    minfft = (int)(cmd_ln_float32_r(wtf->config, "-samprate")
                   * cmd_ln_float32_r(wtf->config, "-wlen") + 0.5);
    for (nfft = 1; nfft < minfft; nfft <<= 1)
        ;
    if (nfft > cmd_ln_int32_r(wtf->config, "-nfft")) {
        E_WARN("Value of -nfft = %d is too small, increasing to %d\n",
               cmd_ln_int32_r(wtf->config, "-nfft"), nfft);
        cmd_ln_set_int32_r(wtf->config, "-nfft", nfft);
        fe_free(wtf->fe);
        wtf->fe = fe_init_auto_r(wtf->config);
    }

    /* Get the output frame size (if not already set). */
    if (wtf->veclen == 0)
        wtf->veclen = fe_get_output_size(wtf->fe);

    /* Set up the input and output buffers. */
    fe_get_input_size(wtf->fe, &fshift, &fsize);
    /* Want to get at least a whole frame plus shift in here.  Also we
       will either pick or mix multiple channels so we need to read
       them all at once. */
    nchans = cmd_ln_int32_r(wtf->config, "-nchans");
    wtf->blocksize = cmd_ln_int32_r(wtf->config, "-blocksize") * nchans;
    if (wtf->blocksize < (fsize + fshift) * nchans) {
        E_INFO("Block size of %d too small, increasing to %d\n",
               wtf->blocksize,
               (fsize + fshift) * nchans);
        wtf->blocksize = (fsize + fshift) * nchans;
    }
    wtf->audio = (short *)ckd_calloc(wtf->blocksize, sizeof(*wtf->audio));
    wtf->featsize = (wtf->blocksize / nchans - fsize) / fshift;

    /* Use the maximum of the input and output frame sizes to allocate this. */
    veclen = wtf->veclen;
    if (wtf->in_veclen > veclen) veclen = wtf->in_veclen;
    
    wtf->feat = (mfcc_t**)ckd_calloc_2d(wtf->featsize, veclen, sizeof(**wtf->feat));

    /* Let's go! */
    if ((wtf->outfh = fopen(outfile, "wb")) == NULL) {
        E_ERROR_SYSTEM("Failed to open %s for writing", outfile);
        return -1;
    }
    /* Write an empty header, which we'll fill in later. */
    if (wtf->ot->output_header &&
        (*wtf->ot->output_header)(wtf, 0) < 0) {
        E_ERROR_SYSTEM("Failed to write empty header to %s\n", outfile);
        goto error_out;
    }
    wtf->outfile = ckd_salloc(outfile);

    if ((nfloat = (*atype->decode)(wtf)) < 0) {
    	E_ERROR("Failed to convert");
    	goto error_out;
    }

    if (wtf->ot->output_header) {
        if (fseek(wtf->outfh, 0, SEEK_SET) < 0) {
            E_ERROR_SYSTEM("Failed to seek to beginning of %s\n", outfile);
            goto error_out;
        }
        if ((*wtf->ot->output_header)(wtf, nfloat) < 0) {
            E_ERROR_SYSTEM("Failed to write header to %s\n", outfile);
            goto error_out;
        }
    }
    

    if (wtf->audio)
	ckd_free(wtf->audio);
    if (wtf->feat)
	ckd_free_2d(wtf->feat);
    if (wtf->infile)
        ckd_free(wtf->infile);
    if (wtf->outfile)
	ckd_free(wtf->outfile);

    wtf->audio = NULL;
    wtf->infile = NULL;
    wtf->feat = NULL;
    wtf->outfile = NULL;

    if (wtf->outfh)
	if (fclose(wtf->outfh) == EOF)
    	    E_ERROR_SYSTEM("Failed to close output file");
    wtf->outfh = NULL;

    return 0;

error_out:

    if (wtf->audio)
	ckd_free(wtf->audio);
    if (wtf->feat)
	ckd_free_2d(wtf->feat);
    if (wtf->infile)
        ckd_free(wtf->infile);
    if (wtf->outfile)
	ckd_free(wtf->outfile);

    wtf->audio = NULL;
    wtf->infile = NULL;
    wtf->feat = NULL;
    wtf->outfile = NULL;

    if (wtf->outfh)
	if (fclose(wtf->outfh) == EOF)
    	    E_ERROR_SYSTEM("Failed to close output file");
    wtf->outfh = NULL;

    return -1;
}