static int
ld_init_impl(live_decoder_t * _decoder, int32 _internal_cmdln)
{
    param_t fe_param;
    int rv = LD_SUCCESS;

    assert(_decoder != NULL);

    unlimit();

    /* ARCHAN 20050708: This part should be factored with fe_parse_option */
    /* allocate and initialize front-end */
    fe_init_params(&fe_param);
    fe_param.SAMPLING_RATE = cmd_ln_float32("-samprate");
    fe_param.FRAME_RATE = cmd_ln_int32("-frate");
    fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen");
    fe_param.FB_TYPE = strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0 ?
        MEL_SCALE : LOG_LINEAR;
    fe_param.NUM_CEPSTRA = cmd_ln_int32("-ncep");
    fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt");
    fe_param.FFT_SIZE = cmd_ln_int32("-nfft");
    fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf");
    fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf");
    fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha");
    fe_param.dither = strcmp("no", cmd_ln_str("-dither"));
    fe_param.warp_type = cmd_ln_str("-warp_type");
    fe_param.warp_params = cmd_ln_str("-warp_params");

    if ((_decoder->fe = fe_init(&fe_param)) == NULL) {
        E_WARN("Failed to initialize front-end.\n");
        rv = LD_ERROR_OUT_OF_MEMORY;
        goto ld_init_impl_cleanup;
    }

    /* capture decoder parameters */
    kb_init(&_decoder->kb);

    /* initialize decoder variables */
    _decoder->kbcore = _decoder->kb.kbcore;
    _decoder->hyp_frame_num = -1;
    _decoder->uttid = NULL;
    _decoder->ld_state = LD_STATE_IDLE;
    _decoder->hyp_str = NULL;
    _decoder->hyp_segs = NULL;

    /*
       _decoder->swap= (cmd_ln_int32("-machine_endian") != cmd_ln_int32("-input_endian"));
     */

    _decoder->swap =
        (strcmp(cmd_ln_str("-machine_endian"), cmd_ln_str("-input_endian"))
         != 0);

    _decoder->phypdump = (cmd_ln_int32("-phypdump"));
    _decoder->rawext = (cmd_ln_str("-rawext"));

    if (_decoder->phypdump)
        E_INFO("Partial hypothesis WILL be dumped\n");
    else
        E_INFO("Partial hypothesis will NOT be dumped\n");


    if (_decoder->swap)
        E_INFO("Input data WILL be byte swapped\n");
    else
        E_INFO("Input data will NOT be byte swapped\n");


    _decoder->internal_cmdln = _internal_cmdln;
    _decoder->features =
        feat_array_alloc(kbcore_fcb(_decoder->kbcore), LIVEBUFBLOCKSIZE);
    if (_decoder->features == NULL) {
        E_WARN("Failed to allocate internal feature buffer.\n");
        rv = LD_ERROR_OUT_OF_MEMORY;
        goto ld_init_impl_cleanup;
    }

    return LD_SUCCESS;

  ld_init_impl_cleanup:
    if (_decoder->fe != NULL) {
        fe_close(_decoder->fe);
    }
    if (_decoder->features != NULL) {
        /* consult the implementation of feat_array_alloc() for how to free our
         * internal feature vector buffer */
        ckd_free((void *) **_decoder->features);
        ckd_free_2d((void **) _decoder->features);
    }
    if (_internal_cmdln == TRUE) {
        cmd_ln_free();
    }
    _decoder->ld_state = LD_STATE_FINISHED;

    return rv;
}
Example #2
0
/*********************************************************************
   FUNCTION:   fe_init_auto
   PARAMETERS: fe_t *
   RETURNS:    nothing
   DESCRIPTION: automatically grab front-end parameters from command
   line arguments and initializes the front-end structure
**********************************************************************/
fe_t *
fe_init_auto()
{
    param_t p;

    fe_init_params(&p);

    p.SAMPLING_RATE = cmd_ln_float32("-samprate");
    p.FRAME_RATE = cmd_ln_int32("-frate");
    p.WINDOW_LENGTH = cmd_ln_float32("-wlen");
    if (strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0)
        p.FB_TYPE = MEL_SCALE;
    else if (strcmp("log_linear", cmd_ln_str("-fbtype")) == 0)
        p.FB_TYPE = LOG_LINEAR;
    else {
        E_WARN("Invalid fbtype\n");
        return NULL;
    }

    p.NUM_CEPSTRA = cmd_ln_int32("-ncep");
    p.NUM_FILTERS = cmd_ln_int32("-nfilt");
    p.FFT_SIZE = cmd_ln_int32("-nfft");

    p.UPPER_FILT_FREQ = cmd_ln_float32("-upperf");
    p.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf");
    p.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha");
    if (cmd_ln_boolean("-dither")) {
        p.dither = 1;
        p.seed = cmd_ln_int32("-seed");
    }
    else
        p.dither = 0;

#ifdef WORDS_BIGENDIAN
    p.swap = strcmp("big", cmd_ln_str("-input_endian")) == 0 ? 0 : 1;
#else        
    p.swap = strcmp("little", cmd_ln_str("-input_endian")) == 0 ? 0 : 1;
#endif

    if (cmd_ln_boolean("-logspec"))
        p.logspec = RAW_LOG_SPEC;
    if (cmd_ln_boolean("-smoothspec"))
        p.logspec = SMOOTH_LOG_SPEC;
    p.doublebw = cmd_ln_boolean("-doublebw");
    p.unit_area = cmd_ln_boolean("-unit_area");
    p.round_filters = cmd_ln_boolean("-round_filters");
    p.remove_dc = cmd_ln_boolean("-remove_dc");
    p.verbose = cmd_ln_boolean("-verbose");

    if (0 == strcmp(cmd_ln_str("-transform"), "dct"))
        p.transform = DCT_II;
    else if (0 == strcmp(cmd_ln_str("-transform"), "legacy"))
        p.transform = LEGACY_DCT;
    else if (0 == strcmp(cmd_ln_str("-transform"), "htk"))
        p.transform = DCT_HTK;
    else {
        E_WARN("Invalid transform type (values are 'dct', 'legacy', 'htk')\n");
        return NULL;
    }

    p.warp_type = cmd_ln_str("-warp_type");
    p.warp_params = cmd_ln_str("-warp_params");

    p.lifter_val = cmd_ln_int32("-lifter");

    return fe_init(&p);

}
Example #3
0
int main (int32 argc, char *argv[])
{
    char line[4096], filename[4096], idspec[4096], *uttid, *result;
    int32 sf, ef, sps, adcin, nf;
    int16 adbuf[4096];
    int32 i, k;
    float32 **mfcbuf;
    CDCN_type *cdcn;
    param_t param;
    fe_t *fe = NULL;


    fbs_init (argc, argv);
    /* Assume that cdcn_init is part of the above fbs_init() */
    cdcn = uttproc_get_cdcn_ptr();

    adcin = query_adc_input();
    assert (adcin);	/* Limited to processing audio input files (not cep) */
    sps = query_sampling_rate();

    fe_init_params(&param);
    param.SAMPLING_RATE = (float)sps;

    if ((fe = fe_init (&param)) == NULL)
    {
        E_ERROR("fe_init() failed to initialize\n");
        exit (-1);
    }
    mfcbuf = (float32 **) ckd_calloc_2d (8192, 13, sizeof(float32));

    /* Process "control file" input through stdin */
    while (fgets (line, sizeof(line), stdin) != NULL) {
        if (uttproc_parse_ctlfile_entry (line, filename, &sf, &ef, idspec) < 0)
            continue;
        assert ((sf < 0) && (ef < 0));	/* Processing entire input file */

        uttid = build_uttid (idspec);

        uttproc_begin_utt (uttid);

        /* Convert raw data file to cepstra */
        if (uttfile_open (filename) < 0) {
            E_ERROR("uttfile_open(%s) failed\n", filename);
            continue;
        }
        fe_start_utt(fe);
        nf = 0;
        while ((k = adc_file_read (adbuf, 4096)) >= 0) {
            if (fe_process_utt (fe, adbuf, k, mfcbuf+nf, &k) == FE_ZERO_ENERGY_ERROR) {
                E_WARN("Frames with zero energy. Consider using dither\n");
            }
            nf += k;
            /* WARNING!! No check for mfcbuf overflow */
        }
        fe_end_utt(fe, mfcbuf[nf], &k);
        fe_close(fe);
        uttfile_close ();

        if (nf <= 0) {
            E_ERROR("Empty utterance\n");
            continue;
        } else
            E_INFO("%d frames\n", nf);

        /* Update CDCN module */
        cdcn_converged_update (mfcbuf, /* cepstra buffer */
                               nf, /* Number of frames */
                               cdcn, /* The CDCN wrapper */
                               1 /* One iteration */
                              );

        /* CDCN */
        for (i = 0; i < nf; i++)
            cdcn_norm (mfcbuf[i], cdcn);

        /* Process normalized cepstra */
        uttproc_cepdata (mfcbuf, nf, 1);
        uttproc_end_utt ();
        uttproc_result (&k, &result, 1);
        printf ("\n");
        fflush (stdout);

    }

    ckd_free_2d((void **)mfcbuf);

    fbs_end ();
    return 0;
}