acmod_t * acmod_copy(acmod_t *other) { acmod_t *acmod; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->refcount = 1; acmod->config = cmd_ln_retain(other->config); acmod->lmath = logmath_retain(other->lmath); acmod->mdef = bin_mdef_retain(other->mdef); acmod->tmat = tmat_retain(other->tmat); acmod->mgau = ps_mgau_copy(other->mgau); acmod->fb = featbuf_retain(other->fb); acmod->fcb = other->fcb; /* Implicitly retained with fb, I think */ /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen"); acmod->feat_buf = feat_array_alloc(acmod->fcb, 1); return acmod; }
sphinx_wave2feat_t * sphinx_wave2feat_init(cmd_ln_t *config) { sphinx_wave2feat_t *wtf; int i; wtf = (sphinx_wave2feat_t *)ckd_calloc(1, sizeof(*wtf)); wtf->refcount = 1; wtf->config = cmd_ln_retain(config); wtf->fe = fe_init_auto_r(wtf->config); if (!wtf->fe) { E_FATAL("Failed to create feature extraction\n"); } wtf->ot = outtypes; /* Default (sphinx) type. */ for (i = 0; i < nouttypes; ++i) { output_type_t const *otype = &outtypes[i]; if (0 == strcmp(cmd_ln_str_r(config, "-ofmt"), otype->name)) { wtf->ot = otype; break; } } if (i == nouttypes) { E_ERROR("Unknown output type: '%s'\n", cmd_ln_str_r(config, "-ofmt")); sphinx_wave2feat_free(wtf); return NULL; } return wtf; }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, featbuf_t *fb) { acmod_t *acmod; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->refcount = 1; acmod->config = cmd_ln_retain(config); acmod->lmath = logmath_retain(lmath); acmod->fb = featbuf_retain(fb); acmod->fcb = featbuf_get_fcb(acmod->fb); /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); acmod->feat_buf = feat_array_alloc(acmod->fcb, 1); return acmod; error_out: acmod_free(acmod); return NULL; }
int main(int argc, char *argv[]) { sbthread_t *threads[10]; cmd_ln_t *config; int i; E_INFO("Processing chan3.raw in 10 threads\n"); if ((config = cmd_ln_parse_r(NULL, fe_args, 0, NULL, FALSE)) == NULL) return -1; err_set_callback(err_threaded_cb, NULL); pthread_key_create(&logfp_index, NULL); pthread_setspecific(logfp_index, (void*)stderr); for (i = 0; i < 10; ++i) { config = cmd_ln_retain(config); threads[i] = sbthread_start(config, process, (void *)(long)i); } for (i = 0; i < 10; ++i) { int rv; rv = sbthread_wait(threads[i]); E_INFO("Thread %d exited with status %d\n", i, rv); sbthread_free(threads[i]); } /* Now check to make sure they all created logfiles with the * correct contents. */ for (i = 0; i < 10; ++i) { char logfile[16], line[256]; FILE *logfh; sprintf(logfile, "%03d.log", i); TEST_ASSERT(logfh = fopen(logfile, "r")); while (fgets(line, sizeof(line), logfh)) { string_trim(line, STRING_BOTH); printf("%s: |%s|\n", logfile, line); /* total number of frames in audio file is 1436, but there are only 1290 voiced */ TEST_EQUAL(0, strcmp(line, "INFO: test_tls_log.c(61): nfr = 1290")); } fclose(logfh); } cmd_ln_free_r(config); return 0; }
acmod_t * acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) { acmod_t *acmod; char const *featparams; acmod = ckd_calloc(1, sizeof(*acmod)); acmod->config = cmd_ln_retain(config); acmod->lmath = lmath; acmod->state = ACMOD_IDLE; /* Look for feat.params in acoustic model dir. */ if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) { if (NULL != cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE)) E_INFO("Parsed model-specific feature parameters from %s\n", featparams); } /* Initialize feature computation. */ if (fe) { if (acmod_fe_mismatch(acmod, fe)) goto error_out; fe_retain(fe); acmod->fe = fe; } else { /* Initialize a new front end. */ acmod->fe = fe_init_auto_r(config); if (acmod->fe == NULL) goto error_out; if (acmod_fe_mismatch(acmod, acmod->fe)) goto error_out; } if (fcb) { if (acmod_feat_mismatch(acmod, fcb)) goto error_out; feat_retain(fcb); acmod->fcb = fcb; } else { /* Initialize a new fcb. */ if (acmod_init_feat(acmod) < 0) goto error_out; } /* Load acoustic model parameters. */ if (acmod_init_am(acmod) < 0) goto error_out; /* The MFCC buffer needs to be at least as large as the dynamic * feature window. */ acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; acmod->mfc_buf = (mfcc_t **) ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, sizeof(**acmod->mfc_buf)); /* Feature buffer has to be at least as large as MFCC buffer. */ acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window"); acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); acmod->utt_start_frame = 0; /* Senone computation stuff. */ acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_scores)); acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), sizeof(*acmod->senone_active)); acmod->log_zero = logmath_get_zero(acmod->lmath); acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); return acmod; error_out: acmod_free(acmod); return NULL; }
SWIGINTERN Config *Decoder_getConfig(Decoder *self){ return cmd_ln_retain(ps_get_config(self)); }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyphrase; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef, ps->acmod->lmath)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyphrase = cmd_ln_str_r(config, "-keyphrase"))) { if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Or load a JSGF grammar */ if ((path = cmd_ln_str_r(config, "-jsgf"))) { if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lm")) && !cmd_ln_boolean_r(ps->config, "-allphone")) { if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) || ps_set_search(ps, PS_DEFAULT_SEARCH)) return -1; } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else { E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); return -1; } } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
fe_t * fe_init_auto_r(cmd_ln_t *config) { fe_t *fe; int prespch_frame_len; fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); fe->refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { fe_free(fe); return NULL; } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); fe->prior = 0; fe_start_stream(fe); assert (fe->frame_shift > 1); if (fe->frame_size < fe->frame_shift) { E_ERROR ("Frame size %d (-wlen) must be greater than frame shift %d (-frate)\n", fe->frame_size, fe->frame_shift); fe_free(fe); return NULL; } if (fe->frame_size > (fe->fft_size)) { E_ERROR ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", fe->frame_size, fe->fft_size); fe_free(fe); return NULL; } if (fe->dither) fe_init_dither(fe->seed); /* establish buffers for overflow samps and hamming window */ fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); /* create hamming window */ fe_create_hamming(fe->hamming_window, fe->frame_size); /* init and fill appropriate filter structure */ fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); /* transfer params to mel fb */ fe_parse_melfb_params(config, fe, fe->mel_fb); if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); fe_free(fe); return NULL; } fe_build_melfilters(fe->mel_fb); fe_compute_melcosine(fe->mel_fb); if (fe->remove_noise || fe->remove_silence) fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); fe->vad_data = (vad_data_t*)ckd_calloc(1, sizeof(*fe->vad_data)); prespch_frame_len = fe->log_spec != RAW_LOG_SPEC ? fe->num_cepstra : fe->mel_fb->num_filters; fe->vad_data->prespch_buf = fe_prespch_init(fe->pre_speech + 1, prespch_frame_len, fe->frame_shift); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); /* create twiddle factors */ fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); fe_create_twiddle(fe); if (cmd_ln_boolean_r(config, "-verbose")) { fe_print_current(fe); } /*** Initialize the overflow buffers ***/ fe_start_utt(fe); return fe; }
fe_t * fe_init_auto_r(cmd_ln_t *config) { fe_t *fe; fe = ckd_calloc(1, sizeof(*fe)); fe->refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { fe_free(fe); return NULL; } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); fe->prior = 0; fe->frame_counter = 0; assert (fe->frame_shift > 1); if (fe->frame_size > (fe->fft_size)) { E_WARN ("Number of FFT points has to be a power of 2 higher than %d\n", (fe->frame_size)); fe_free(fe); return (NULL); } if (fe->dither) fe_init_dither(fe->seed); /* establish buffers for overflow samps and hamming window */ fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); /* create hamming window */ fe_create_hamming(fe->hamming_window, fe->frame_size); /* init and fill appropriate filter structure */ fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); /* transfer params to mel fb */ fe_parse_melfb_params(config, fe, fe->mel_fb); fe_build_melfilters(fe->mel_fb); fe_compute_melcosine(fe->mel_fb); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); /* create twiddle factors */ fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); fe_create_twiddle(fe); if (cmd_ln_boolean_r(config, "-verbose")) { fe_print_current(fe); } /*** Z.A.B. ***/ /*** Initialize the overflow buffers ***/ fe_start_utt(fe); return fe; }
SWIGINTERN Config *ps_decoder_s_getConfig(struct ps_decoder_s *self){ return cmd_ln_retain(ps_get_config(self)); }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { char const *lmfile, *lmctl = NULL; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; ps->searches = glist_add_ptr(ps->searches, ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; /* Determine whether we are starting out in FSG or N-Gram search mode. */ if (cmd_ln_str_r(ps->config, "-fsg") || cmd_ln_str_r(ps->config, "-jsgf")) { ps_search_t *fsgs; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((fsgs = fsg_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; fsgs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, fsgs); ps->search = fsgs; } else if ((lmfile = cmd_ln_str_r(ps->config, "-lm")) || (lmctl = cmd_ln_str_r(ps->config, "-lmctl"))) { ps_search_t *ngs; /* Make the acmod's feature buffer growable if we are doing two-pass search. */ if (cmd_ln_boolean_r(ps->config, "-fwdflat") && cmd_ln_boolean_r(ps->config, "-fwdtree")) acmod_set_grow(ps->acmod, TRUE); if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; if ((ngs = ngram_search_init(ps->config, ps->acmod, ps->dict, ps->d2p)) == NULL) return -1; ngs->pls = ps->phone_loop; ps->searches = glist_add_ptr(ps->searches, ngs); ps->search = ngs; } /* Otherwise, we will initialize the search whenever the user * decides to load an FSG or a language model. */ else { if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
fe_t * fe_init_auto() { return fe_init_auto_r(cmd_ln_retain(cmd_ln_get())); }