static int ld_init_impl(live_decoder_t * _decoder, int32 _internal_cmdln) { param_t fe_param; int rv = LD_SUCCESS; assert(_decoder != NULL); unlimit(); /* ARCHAN 20050708: This part should be factored with fe_parse_option */ /* allocate and initialize front-end */ fe_init_params(&fe_param); fe_param.SAMPLING_RATE = cmd_ln_float32("-samprate"); fe_param.FRAME_RATE = cmd_ln_int32("-frate"); fe_param.WINDOW_LENGTH = cmd_ln_float32("-wlen"); fe_param.FB_TYPE = strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0 ? MEL_SCALE : LOG_LINEAR; fe_param.NUM_CEPSTRA = cmd_ln_int32("-ncep"); fe_param.NUM_FILTERS = cmd_ln_int32("-nfilt"); fe_param.FFT_SIZE = cmd_ln_int32("-nfft"); fe_param.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); fe_param.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); fe_param.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); fe_param.dither = strcmp("no", cmd_ln_str("-dither")); fe_param.warp_type = cmd_ln_str("-warp_type"); fe_param.warp_params = cmd_ln_str("-warp_params"); if ((_decoder->fe = fe_init(&fe_param)) == NULL) { E_WARN("Failed to initialize front-end.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } /* capture decoder parameters */ kb_init(&_decoder->kb); /* initialize decoder variables */ _decoder->kbcore = _decoder->kb.kbcore; _decoder->hyp_frame_num = -1; _decoder->uttid = NULL; _decoder->ld_state = LD_STATE_IDLE; _decoder->hyp_str = NULL; _decoder->hyp_segs = NULL; /* _decoder->swap= (cmd_ln_int32("-machine_endian") != cmd_ln_int32("-input_endian")); */ _decoder->swap = (strcmp(cmd_ln_str("-machine_endian"), cmd_ln_str("-input_endian")) != 0); _decoder->phypdump = (cmd_ln_int32("-phypdump")); _decoder->rawext = (cmd_ln_str("-rawext")); if (_decoder->phypdump) E_INFO("Partial hypothesis WILL be dumped\n"); else E_INFO("Partial hypothesis will NOT be dumped\n"); if (_decoder->swap) E_INFO("Input data WILL be byte swapped\n"); else E_INFO("Input data will NOT be byte swapped\n"); _decoder->internal_cmdln = _internal_cmdln; _decoder->features = feat_array_alloc(kbcore_fcb(_decoder->kbcore), LIVEBUFBLOCKSIZE); if (_decoder->features == NULL) { E_WARN("Failed to allocate internal feature buffer.\n"); rv = LD_ERROR_OUT_OF_MEMORY; goto ld_init_impl_cleanup; } return LD_SUCCESS; ld_init_impl_cleanup: if (_decoder->fe != NULL) { fe_close(_decoder->fe); } if (_decoder->features != NULL) { /* consult the implementation of feat_array_alloc() for how to free our * internal feature vector buffer */ ckd_free((void *) **_decoder->features); ckd_free_2d((void **) _decoder->features); } if (_internal_cmdln == TRUE) { cmd_ln_free(); } _decoder->ld_state = LD_STATE_FINISHED; return rv; }
/********************************************************************* FUNCTION: fe_init_auto PARAMETERS: fe_t * RETURNS: nothing DESCRIPTION: automatically grab front-end parameters from command line arguments and initializes the front-end structure **********************************************************************/ fe_t * fe_init_auto() { param_t p; fe_init_params(&p); p.SAMPLING_RATE = cmd_ln_float32("-samprate"); p.FRAME_RATE = cmd_ln_int32("-frate"); p.WINDOW_LENGTH = cmd_ln_float32("-wlen"); if (strcmp("mel_scale", cmd_ln_str("-fbtype")) == 0) p.FB_TYPE = MEL_SCALE; else if (strcmp("log_linear", cmd_ln_str("-fbtype")) == 0) p.FB_TYPE = LOG_LINEAR; else { E_WARN("Invalid fbtype\n"); return NULL; } p.NUM_CEPSTRA = cmd_ln_int32("-ncep"); p.NUM_FILTERS = cmd_ln_int32("-nfilt"); p.FFT_SIZE = cmd_ln_int32("-nfft"); p.UPPER_FILT_FREQ = cmd_ln_float32("-upperf"); p.LOWER_FILT_FREQ = cmd_ln_float32("-lowerf"); p.PRE_EMPHASIS_ALPHA = cmd_ln_float32("-alpha"); if (cmd_ln_boolean("-dither")) { p.dither = 1; p.seed = cmd_ln_int32("-seed"); } else p.dither = 0; #ifdef WORDS_BIGENDIAN p.swap = strcmp("big", cmd_ln_str("-input_endian")) == 0 ? 0 : 1; #else p.swap = strcmp("little", cmd_ln_str("-input_endian")) == 0 ? 0 : 1; #endif if (cmd_ln_boolean("-logspec")) p.logspec = RAW_LOG_SPEC; if (cmd_ln_boolean("-smoothspec")) p.logspec = SMOOTH_LOG_SPEC; p.doublebw = cmd_ln_boolean("-doublebw"); p.unit_area = cmd_ln_boolean("-unit_area"); p.round_filters = cmd_ln_boolean("-round_filters"); p.remove_dc = cmd_ln_boolean("-remove_dc"); p.verbose = cmd_ln_boolean("-verbose"); if (0 == strcmp(cmd_ln_str("-transform"), "dct")) p.transform = DCT_II; else if (0 == strcmp(cmd_ln_str("-transform"), "legacy")) p.transform = LEGACY_DCT; else if (0 == strcmp(cmd_ln_str("-transform"), "htk")) p.transform = DCT_HTK; else { E_WARN("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); return NULL; } p.warp_type = cmd_ln_str("-warp_type"); p.warp_params = cmd_ln_str("-warp_params"); p.lifter_val = cmd_ln_int32("-lifter"); return fe_init(&p); }
int main (int32 argc, char *argv[]) { char line[4096], filename[4096], idspec[4096], *uttid, *result; int32 sf, ef, sps, adcin, nf; int16 adbuf[4096]; int32 i, k; float32 **mfcbuf; CDCN_type *cdcn; param_t param; fe_t *fe = NULL; fbs_init (argc, argv); /* Assume that cdcn_init is part of the above fbs_init() */ cdcn = uttproc_get_cdcn_ptr(); adcin = query_adc_input(); assert (adcin); /* Limited to processing audio input files (not cep) */ sps = query_sampling_rate(); fe_init_params(¶m); param.SAMPLING_RATE = (float)sps; if ((fe = fe_init (¶m)) == NULL) { E_ERROR("fe_init() failed to initialize\n"); exit (-1); } mfcbuf = (float32 **) ckd_calloc_2d (8192, 13, sizeof(float32)); /* Process "control file" input through stdin */ while (fgets (line, sizeof(line), stdin) != NULL) { if (uttproc_parse_ctlfile_entry (line, filename, &sf, &ef, idspec) < 0) continue; assert ((sf < 0) && (ef < 0)); /* Processing entire input file */ uttid = build_uttid (idspec); uttproc_begin_utt (uttid); /* Convert raw data file to cepstra */ if (uttfile_open (filename) < 0) { E_ERROR("uttfile_open(%s) failed\n", filename); continue; } fe_start_utt(fe); nf = 0; while ((k = adc_file_read (adbuf, 4096)) >= 0) { if (fe_process_utt (fe, adbuf, k, mfcbuf+nf, &k) == FE_ZERO_ENERGY_ERROR) { E_WARN("Frames with zero energy. Consider using dither\n"); } nf += k; /* WARNING!! No check for mfcbuf overflow */ } fe_end_utt(fe, mfcbuf[nf], &k); fe_close(fe); uttfile_close (); if (nf <= 0) { E_ERROR("Empty utterance\n"); continue; } else E_INFO("%d frames\n", nf); /* Update CDCN module */ cdcn_converged_update (mfcbuf, /* cepstra buffer */ nf, /* Number of frames */ cdcn, /* The CDCN wrapper */ 1 /* One iteration */ ); /* CDCN */ for (i = 0; i < nf; i++) cdcn_norm (mfcbuf[i], cdcn); /* Process normalized cepstra */ uttproc_cepdata (mfcbuf, nf, 1); uttproc_end_utt (); uttproc_result (&k, &result, 1); printf ("\n"); fflush (stdout); } ckd_free_2d((void **)mfcbuf); fbs_end (); return 0; }