/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* LM */ lm_read ((char *) cmd_ln_access("-lmfn"), ""); /* Filler penalties */ fillpen_init ((char *) cmd_ln_access("-fillpenfn"), dict->filler_start, dict->filler_end); }
static int put_dhmm(float32 **tmat, float32 ***mixw, const char *dir, const char *name) { const char *hmm_ext; char fn[MAXPATHLEN]; FILE *fp; hmm_ext = cmd_ln_access("-hmmext"); sprintf(fn, "%s/%s.%s", dir, name, hmm_ext); fp = fopen(fn, "wb"); if (fp == NULL) { E_ERROR_SYSTEM("can't open %s for writing", fn); return S3_ERROR; } if (write_dhmm(tmat, mixw, fp) != S3_SUCCESS) return S3_ERROR; fclose(fp); return S3_SUCCESS; }
static int put_sdm(float32 **tmat, const char *in_dir_name, const char *ci_name) { const char *hmm_ext; char ci_hmm_filename[MAXPATHLEN]; FILE *fp; hmm_ext = cmd_ln_access("-hmmext"); sprintf(ci_hmm_filename, "%s/%s.%s", in_dir_name, ci_name, hmm_ext); fp = fopen(ci_hmm_filename, "wb"); if (fp == NULL) { fflush(stdout); fprintf(stderr, "%s(%d): can't open %s for reading to extract tmat\n", __FILE__, __LINE__, ci_hmm_filename); fflush(stderr); return S3_ERROR; } if (write_sdm(tmat, fp) != S3_SUCCESS) return S3_ERROR; fclose(fp); return S3_SUCCESS; }
void agc(float32 *mfcc, uint32 n_frame) { const char *agc_type = cmd_ln_access("-agc"); uint32 i; if (strcmp(agc_type, "noise") == 0) { real_agc_noise(mfcc, n_frame, veclen); } else if (strcmp(agc_type, "max") == 0) { agc_max(mfcc, n_frame, veclen); } else if (strcmp(agc_type, "emax") == 0) { for (i = 0; i < n_frame; i++) { agc_emax_proc(&mfcc[i*veclen], &mfcc[i*veclen], veclen); } } else if (strcmp(agc_type, "none") == 0) { /* do nothing */ } else if (agc_type == NULL) { E_WARN("no agc set\n"); return ; } else { E_FATAL("unsupported agc type %s\n", agc_type); } }
void norm_mean (float32 *vec, /* the data */ uint32 nvec, /* number of vectors (frames) */ uint32 veclen) /* number of components per vector */ { static double *mean = 0, *var = 0; double temp; float32 *data; uint32 i, f; const char *normvar = cmd_ln_access("-varnorm"); if (mean == 0) mean = (double *) ckd_calloc (veclen, sizeof (double)); if (var == 0) var = (double *) ckd_calloc (veclen, sizeof (double)); for (i = 0; i < veclen; i++) mean[i] = var[i] = 0.0; /* * Compute the sum */ for (data = vec, f = 0; f < nvec; f++, data += veclen) { for (i = 0; i < veclen; i++) mean[i] += (double)data[i]; } /* * Compute the mean */ for (i = 0; i < veclen; i++) { mean[i] /= (double)nvec; } if (strcmp(normvar,"yes") == 0){ for (data = vec, f = 0; f < nvec; f++, data += veclen) { for (i = 0; i < veclen; i++) var[i] += ((double)data[i]-mean[i])*((double)data[i]-mean[i]); } for (i = 0; i < veclen; i++) { temp = var[i] / (double)nvec; var[i] = sqrt(temp); } } /* * Normalize the data */ for (data = vec, f = 0; f < nvec; f++, data += veclen) { for (i = 0; i < veclen; i++) data[i] -= (float)mean[i]; } if (strcmp(normvar,"yes") == 0){ for (data = vec, f = 0; f < nvec; f++, data += veclen) { for (i = 0; i < veclen; i++) data[i] /= (float)var[i]; } } }
int parse_cmd_ln(int argc, char *argv[]) { uint32 isHelp; uint32 isExample; #include "cmd_ln_defn.h" cmd_ln_define(defn); if (argc == 1) { cmd_ln_print_definitions(); exit(1); } cmd_ln_parse(argc, argv); isHelp = *(uint32 *) cmd_ln_access("-help"); isExample = *(uint32 *) cmd_ln_access("-example"); if(isHelp){ printf("%s\n\n",helpstr); } if(isExample){ printf("%s\n\n",examplestr); } if(isHelp || isExample){ E_INFO("User asked for help or example.\n"); exit(1); } if(!isHelp && !isExample){ if (cmd_ln_validate() == FALSE) { E_FATAL("Unable to validate command line arguments\n"); } cmd_ln_print_configuration(); } return 0; }
static void gst_pocketsphinx_set_boolean (GstPocketSphinx *sink, const gchar *key, const GValue *value) { /* NOTE: This is an undocumented feature of SphinxBase's cmd_ln.h. * However it will be officially supported in future releases. */ anytype_t *val; val = cmd_ln_access(key); val->i_32 = g_value_get_boolean(value); }
int32 align_init ( void ) { int32 k; s3wid_t w; float64 *f64arg; mdef = mdef_getmdef (); tmat = tmat_gettmat (); dict = dict_getdict (); assert (mdef && tmat && dict); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); silwid = dict_wordid (SILENCE_WORD); if ((NOT_WID(startwid)) || (NOT_WID(finishwid))) E_FATAL("%s or %s not in dictionary\n", START_WORD, FINISH_WORD); if (NOT_WID(silwid)) E_ERROR("%s not in dictionary; no optional silence inserted between words\n", SILENCE_WORD); /* Create list of optional filler words to be inserted between transcript words */ fillwid = (s3wid_t *) ckd_calloc ((dict->filler_end - dict->filler_start + 3), sizeof(s3wid_t)); k = 0; if (IS_WID(silwid)) fillwid[k++] = silwid; for (w = dict->filler_start; w <= dict->filler_end; w++) { if ((dict_basewid (w) == w) && (w != silwid) && (w != startwid) && (w != finishwid)) fillwid[k++] = w; } fillwid[k] = BAD_WID; f64arg = (float64 *) cmd_ln_access ("-beam"); beam = logs3 (*f64arg); E_INFO ("logs3(beam)= %d\n", beam); score_scale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); hist_head = NULL; align_stseg = NULL; align_phseg = NULL; align_wdseg = NULL; ctr_nstate = counter_new ("NS"); return 0; }
main (int32 argc, char *argv[]) { char *reffile, *mdeffile, *dictfile, *fdictfile; if (argc == 1) { cmd_ln_print_help (stderr, arglist); exit(0); } cmd_ln_parse (arglist, argc, argv); if ((mdeffile = (char *) cmd_ln_access ("-mdef")) == NULL) E_FATAL("-mdef argument missing\n"); if ((dictfile = (char *) cmd_ln_access ("-dict")) == NULL) E_FATAL("-dict argument missing\n"); if ((fdictfile = (char *) cmd_ln_access ("-fdict")) == NULL) E_FATAL("-fdict argument missing\n"); if ((reffile = (char *) cmd_ln_access ("-ref")) == NULL) E_FATAL("-ref argument missing\n"); unlimit(); mdef = mdef_init (mdeffile); if (mdef->n_ciphone <= 0) E_FATAL("0 CIphones in %s\n", mdeffile); dict = dict_init (mdef, dictfile, fdictfile); process_reffile (reffile); #if (0 && (! WIN32)) fflush (stdout); fflush (stderr); system ("ps aguxwww | grep dpalign"); #endif exit(0); }
static int32 cmdline_parse (int argc, char *argv[]) { int32 i; char *logfile; E_INFO("Parsing command line:\n"); for (i = 0; i < argc; i++) { if (argv[i][0] == '-') printf ("\\\n\t"); printf ("%s ", argv[i]); } printf ("\n\n"); fflush (stdout); cmd_ln_parse (argc, argv); if (cmd_ln_validate() == FALSE) { E_FATAL("Unable to validate command line arguments\n"); } logfp = NULL; if ((logfile = (char *)cmd_ln_access("-logfn")) != NULL) { if ((logfp = fopen(logfile, "w")) == NULL) { E_ERROR("fopen(%s,w) failed; logging to stdout/stderr\n"); } else { orig_stdout = *stdout; /* Hack!! To avoid hanging problem under Linux */ orig_stderr = *stderr; /* Hack!! To avoid hanging problem under Linux */ *stdout = *logfp; *stderr = *logfp; E_INFO("Command line:\n"); for (i = 0; i < argc; i++) { if (argv[i][0] == '-') printf ("\\\n\t"); printf ("%s ", argv[i]); } printf ("\n\n"); fflush (stdout); } } E_INFO("Configuration in effect:\n"); cmd_ln_print_configuration(); printf ("\n"); return 0; }
float64 cluster(uint32 ts, uint32 n_stream, uint32 n_in_frame, const uint32 *veclen, vector_t **mean, uint32 n_density, codew_t **out_label) { float64 sum_sqerr, sqerr=0; uint32 s, n_frame; const char *meth; *out_label = NULL; blksize = feat_blksize(); k_means_set_get_obs(&get_obs); for (s = 0, sum_sqerr = 0; s < n_stream; s++, sum_sqerr += sqerr) { meth = (const char *)cmd_ln_access("-method"); n_frame = setup_obs(ts, s, n_in_frame, veclen); if (strcmp(meth, "rkm") == 0) { sqerr = random_kmeans(*(uint32 *)cmd_ln_access("-ntrial"), n_frame, veclen[s], mean[s], n_density, *(float32 *)cmd_ln_access("-minratio"), *(uint32 *)cmd_ln_access("-maxiter"), out_label); if (sqerr < 0) { E_ERROR("Too few observations for kmeans\n"); return -1.0; } } else if (strcmp(meth, "fnkm") == 0) { sqerr = furthest_neighbor_kmeans(n_frame, veclen[s], mean[s], n_density, *(float32 *)cmd_ln_access("-minratio"), *(uint32 *)cmd_ln_access("-maxiter")); } else { E_ERROR("I don't know how to do method '%s'. Sorry.\n", meth); } } return sum_sqerr; }
static void gst_pocketsphinx_set_string (GstPocketSphinx *sink, const gchar *key, const GValue *value) { /* NOTE: This is an undocumented feature of SphinxBase's cmd_ln.h. * However it will be officially supported in future releases. */ anytype_t *val; gchar *str; val = cmd_ln_access(key); if (value != NULL) val->ptr = g_strdup(g_value_get_string(value)); else val->ptr = NULL; if ((str = g_hash_table_lookup(sink->arghash, key))) g_free(str); g_hash_table_insert(sink->arghash, (gpointer)key, val->ptr); }
int main(int argc, char *argv[]) { int32 i; int32 n_map=0; int32 n_class=0; int32 *mllr_map; char line[128]; parse_cmd_ln(argc, argv); if (cmd_ln_access("-nmap")) { n_map = *(int32 *)cmd_ln_access("-nmap"); } else { E_FATAL("Specify # of state -> MLLR class mappings using -nmap\n"); } if (cmd_ln_access("-nclass")) { n_class = *(int32 *)cmd_ln_access("-nclass"); } else { E_FATAL("Specify # of MLLR class mappings using -nclass\n"); } if (cmd_ln_access("-cb2mllrfn") == NULL) { E_FATAL("Specify output file using -cb2mllrfn\n"); } mllr_map = (int32 *)ckd_calloc(n_map, sizeof(int32)); for (i = 0; i < n_map; i++) { if (fgets(line, 128, stdin) == NULL) { E_FATAL("Ran out of mappings at %d, but expected %d\n", i, n_map); } mllr_map[i] = atoi(line); } if (fgets(line, 128, stdin) != NULL) { E_WARN("Expected EOF after %d mappings, but still more data\n", n_map); } if (s3cb2mllr_write((const char *)cmd_ln_access("-cb2mllrfn"), mllr_map, n_map, n_class) != S3_SUCCESS) { return 1; } return 0; }
main (int32 argc, char *argv[]) { char *reffile, *mdeffile, *dictfile, *fdictfile, *homfile; if (argc == 1) { cmd_ln_print_help (stderr, arglist); exit(0); } cmd_ln_parse (arglist, argc, argv); if ((mdeffile = (char *) cmd_ln_access ("-mdef")) == NULL) E_FATAL("-mdef argument missing\n"); if ((dictfile = (char *) cmd_ln_access ("-dict")) == NULL) E_FATAL("-dict argument missing\n"); if ((fdictfile = (char *) cmd_ln_access ("-fdict")) == NULL) E_FATAL("-fdict argument missing\n"); if ((reffile = (char *) cmd_ln_access ("-ref")) == NULL) E_FATAL("-ref argument missing\n"); unlimit(); mdef = mdef_init (mdeffile); if (mdef->n_ciphone <= 0) E_FATAL("0 CIphones in %s\n", mdeffile); dict = dict_init (mdef, dictfile, fdictfile); oovbegin = dict->n_word; startwid = dict_wordid (dict, "<s>"); finishwid = dict_wordid (dict, "</s>"); silwid = dict_wordid (dict, (char *) cmd_ln_access("-sil")); assert (dict_filler_word (dict, silwid)); homlist = NULL; if ((homfile = (char *) cmd_ln_access ("-hom")) != NULL) homfile_load (homfile); process_reffile (reffile); #if (0 && (! WIN32)) fflush (stdout); fflush (stderr); system ("ps aguxwww | grep dpalign"); #endif exit(0); }
void cmn(float32 *mfcc, uint32 n_frame) { const char *type = cmd_ln_access("-cmn"); uint32 i; if (strcmp(type, "current") == 0) { norm_mean(mfcc, n_frame, veclen); } else if (strcmp(type, "prior") == 0) { for (i = 0; i < n_frame; i++) { mean_norm_acc_sub(&mfcc[i*veclen]); } mean_norm_update(); } else if (strcmp(type, "none") == 0) { } else { E_FATAL("Unsupported CMN type %s\n", type); } }
/* Process utterances in the control file (-ctlfn argument) */ static void process_ctlfile ( void ) { FILE *ctlfp, *matchfp, *matchsegfp; char *ctlfile; char *matchfile, *matchsegfile; char line[1024], ctlspec[1024], uttid[1024]; int32 ctloffset, ctlcount; int32 i, k, sf, ef; if ((ctlfile = (char *) cmd_ln_access("-ctlfn")) == NULL) E_FATAL("No -ctlfn argument\n"); E_INFO("Processing ctl file %s\n", ctlfile); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); if ((matchfile = (char *) cmd_ln_access("-matchfn")) == NULL) { E_WARN("No -matchfn argument\n"); matchfp = NULL; } else { if ((matchfp = fopen (matchfile, "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", matchfile); } if ((matchsegfile = (char *) cmd_ln_access("-matchsegfn")) == NULL) { E_WARN("No -matchsegfn argument\n"); matchsegfp = NULL; } else { if ((matchsegfp = fopen (matchsegfile, "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", matchsegfile); } ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); return; } if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) --ctloffset; } while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) { E_ERROR("Error in ctlfile spec; skipped\n"); /* What happens to ctlcount??? */ continue; } if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } decode_utt (uttid, matchfp, matchsegfp); --ctlcount; } printf ("\n"); if (fscanf (ctlfp, "%s", line) == 1) E_INFO("Skipping rest of control file beginning with:\n\t%s\n", line); if (matchfp) fclose (matchfp); if (matchsegfp) fclose (matchsegfp); fclose (ctlfp); }
int main_initialize(int argc, char *argv[], lexicon_t **out_lex, model_def_t **out_omdef, model_def_t **out_dmdef) { model_def_t *dmdef = NULL; model_def_t *omdef = NULL; lexicon_t *lex = NULL; const char *fn; uint32 n_ts; uint32 n_cb; const char *ts2cbfn; parse_cmd_ln(argc, argv); timing_bind_name("km", timing_new()); timing_bind_name("var", timing_new()); timing_bind_name("em", timing_new()); timing_bind_name("all", timing_new()); if (cmd_ln_access("-feat") != NULL) { feat_set(cmd_ln_str("-feat")); feat_set_in_veclen(cmd_ln_int32("-ceplen")); feat_set_subvecs(cmd_ln_str("-svspec")); } else { E_FATAL("You need to set a feature extraction config using -feat\n"); } if (cmd_ln_access("-ldafn") != NULL) { if (feat_read_lda(cmd_ln_access("-ldafn"), cmd_ln_int32("-ldadim"))) { E_FATAL("Failed to read LDA matrix\n"); } } if (cmd_ln_access("-omoddeffn")) { E_INFO("Reading output model definitions: %s\n", cmd_ln_access("-omoddeffn")); /* Read in the model definitions. Defines the set of CI phones and context dependent phones. Defines the transition matrix tying and state level tying. */ if (model_def_read(&omdef, cmd_ln_access("-omoddeffn")) != S3_SUCCESS) { return S3_ERROR; } if (cmd_ln_access("-dmoddeffn")) { E_INFO("Reading dump model definitions: %s\n", cmd_ln_access("-dmoddeffn")); if (model_def_read(&dmdef, cmd_ln_access("-dmoddeffn")) != S3_SUCCESS) { return S3_ERROR; } setup_d2o_map(dmdef, omdef); } else { E_INFO("Assuming dump and output model definitions are identical\n"); } ts2cbfn = cmd_ln_access("-ts2cbfn"); if (ts2cbfn) { if (strcmp(SEMI_LABEL, ts2cbfn) == 0) { omdef->cb = semi_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = 1; } else if (strcmp(CONT_LABEL, ts2cbfn) == 0) { omdef->cb = cont_ts2cb(omdef->n_tied_state); n_ts = omdef->n_tied_state; n_cb = omdef->n_tied_state; } else if (strcmp(PTM_LABEL, ts2cbfn) == 0) { omdef->cb = ptm_ts2cb(omdef); n_ts = omdef->n_tied_state; n_cb = omdef->acmod_set->n_ci; } else if (s3ts2cb_read(cmd_ln_access("-ts2cbfn"), &omdef->cb, &n_ts, &n_cb) != S3_SUCCESS) { return S3_ERROR; } if (omdef->n_tied_state != n_ts) { E_FATAL("Model definition file n_tied_state = %u, but %u mappings in ts2cb\n", omdef->n_tied_state, n_ts); } } } else { E_INFO("No mdef files. Assuming 1-class init\n"); } *out_omdef = omdef; *out_dmdef = dmdef; fn = cmd_ln_access("-dictfn"); if (fn) { E_INFO("Reading main lexicon: %s\n", fn); lex = lexicon_read(NULL, fn, omdef->acmod_set); if (lex == NULL) return S3_ERROR; } fn = cmd_ln_access("-fdictfn"); if (fn) { E_INFO("Reading filler lexicon: %s\n", fn); (void)lexicon_read(lex, fn, omdef->acmod_set); } *out_lex = lex; stride = *(int32 *)cmd_ln_access("-stride"); return S3_SUCCESS; }
int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; uint32 n_stream; const uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; timing_t *all_timer= NULL; timing_t *km_timer= NULL; timing_t *var_timer= NULL; timing_t *em_timer= NULL; if (main_initialize(argc, argv, &lex, &omdef, &dmdef) != S3_SUCCESS) { return -1; } km_timer = timing_get("km"); var_timer = timing_get("var"); em_timer = timing_get("em"); all_timer = timing_get("all"); n_stream = feat_n_stream(); veclen = feat_vecsize(); if (strcmp((const char *)cmd_ln_access("-gthobj"), "state") == 0) { ts_off = *(uint32 *)cmd_ln_access("-tsoff"); if (cmd_ln_access("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = *(uint32 *)cmd_ln_access("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; if (all_timer) timing_reset(all_timer); if (km_timer) timing_reset(km_timer); if (var_timer) timing_reset(var_timer); if (em_timer) timing_reset(em_timer); if (all_timer) timing_start(all_timer); if (init_state((const char *)cmd_ln_access("-segdmpfn"), (const char *)cmd_ln_access("-segidxfn"), *(int32 *)cmd_ln_access("-ndensity"), n_stream, veclen, *(int32 *)cmd_ln_access("-reest"), (const char *)cmd_ln_access("-mixwfn"), (const char *)cmd_ln_access("-meanfn"), (const char *)cmd_ln_access("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } if (all_timer) timing_stop(all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); if (km_timer) { E_INFOCONT(" km %4.3fx %4.3e", km_timer->t_cpu / (n_tot_frame * 0.01), (km_timer->t_cpu > 0 ? km_timer->t_elapsed / km_timer->t_cpu : 0.0)); } if (var_timer) { E_INFOCONT(" var %4.3fx %4.3e", var_timer->t_cpu / (n_tot_frame * 0.01), (var_timer->t_cpu > 0 ? var_timer->t_elapsed / var_timer->t_cpu : 0.0)); } if (em_timer) { E_INFOCONT(" em %4.3fx %4.3e", em_timer->t_cpu / (n_tot_frame * 0.01), (em_timer->t_cpu > 0 ? em_timer->t_elapsed / em_timer->t_cpu : 0.0)); } if (all_timer) { E_INFOCONT(" all %4.3fx %4.3e", all_timer->t_cpu / (n_tot_frame * 0.01), (all_timer->t_cpu > 0 ? all_timer->t_elapsed / all_timer->t_cpu : 0.0)); } E_INFOCONT("\n"); } if (cmd_ln_access("-tsrngfn") != NULL) { fp = fopen((const char *)cmd_ln_access("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", (const char *)cmd_ln_access("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp((const char *)cmd_ln_access("-gthobj"), "single") == 0) { n_tot_frame = 0; if (all_timer) timing_reset(all_timer); if (km_timer) timing_reset(km_timer); if (var_timer) timing_reset(var_timer); if (em_timer) timing_reset(em_timer); if (all_timer) timing_start(all_timer); if (init_state((const char *)cmd_ln_access("-segdmpfn"), NULL, /* No index -> single class dump file */ *(int32 *)cmd_ln_access("-ndensity"), n_stream, veclen, *(int32 *)cmd_ln_access("-reest"), (const char *)cmd_ln_access("-mixwfn"), (const char *)cmd_ln_access("-meanfn"), (const char *)cmd_ln_access("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } if (all_timer) timing_stop(all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); if (km_timer) { E_INFOCONT(" km %4.3fx %4.3e", km_timer->t_cpu / (n_tot_frame * 0.01), (km_timer->t_cpu > 0 ? km_timer->t_elapsed / km_timer->t_cpu : 0.0)); } if (var_timer) { E_INFOCONT(" var %4.3fx %4.3e", var_timer->t_cpu / (n_tot_frame * 0.01), (var_timer->t_cpu > 0 ? var_timer->t_elapsed / var_timer->t_cpu : 0.0)); } if (em_timer) { E_INFOCONT(" em %4.3fx %4.3e", em_timer->t_cpu / (n_tot_frame * 0.01), (em_timer->t_cpu > 0 ? em_timer->t_elapsed / em_timer->t_cpu : 0.0)); } if (all_timer) { E_INFOCONT(" all %4.3fx %4.3e", all_timer->t_cpu / (n_tot_frame * 0.01), (all_timer->t_cpu > 0 ? all_timer->t_elapsed / all_timer->t_cpu : 0.0)); } E_INFOCONT("\n"); } } return 0; }
static int init_state(const char *obsdmp, const char *obsidx, uint32 n_density, uint32 n_stream, const uint32 *veclen, int reest, const char *mixwfn, const char *meanfn, const char *varfn, uint32 ts_off, uint32 ts_cnt, uint32 n_ts, uint32 n_d_ts) { uint32 blksz; vector_t ***mean; vector_t ***var = NULL; vector_t ****fullvar = NULL; float32 ***mixw = NULL; uint32 n_frame; uint32 ignore = 0; codew_t *label; uint32 n_corpus = 0; float64 sqerr; float64 tot_sqerr; segdmp_type_t t; uint32 i, j, ts, n; timing_t *km_timer; timing_t *var_timer; timing_t *em_timer; int32 full_covar; km_timer = timing_get("km"); var_timer = timing_get("var"); em_timer = timing_get("em"); blksz = feat_blksize(); full_covar = cmd_ln_int32("-fullvar"); /* fully-continuous for now */ mean = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (full_covar) fullvar = gauden_alloc_param_full(ts_cnt, n_stream, n_density, veclen); else var = gauden_alloc_param(ts_cnt, n_stream, n_density, veclen); if (mixwfn) mixw = (float32 ***)ckd_calloc_3d(ts_cnt, n_stream, n_density, sizeof(float32)); if ((const char *)cmd_ln_access("-segidxfn")) { E_INFO("Multi-class dump\n"); if (segdmp_open_read((const char **)cmd_ln_access("-segdmpdirs"), (const char *)cmd_ln_access("-segdmpfn"), (const char *)cmd_ln_access("-segidxfn"), &n, &t) != S3_SUCCESS) { E_FATAL("Unable to open dumps\n"); } if (n != n_d_ts) { E_FATAL("Expected %u tied-states in dump, but apparently %u\n", n_d_ts, n); } if (t != SEGDMP_TYPE_FEAT) { E_FATAL("Expected feature dump, but instead saw %u\n", t); } multiclass = TRUE; } else { E_INFO("1-class dump file\n"); multiclass = FALSE; dmp_fp = s3open((const char *)cmd_ln_access("-segdmpfn"), "rb", &dmp_swp); if (dmp_fp == NULL) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", (const char *)cmd_ln_access("-segdmpfn")); return S3_ERROR; } if (s3read(&n_frame, sizeof(uint32), 1, dmp_fp, dmp_swp, &ignore) != 1) { E_ERROR_SYSTEM("Unable to open dump file %s for reading\n", (const char *)cmd_ln_access("-segdmpfn")); return S3_ERROR; } data_offset = ftell(dmp_fp); } tot_sqerr = 0; for (i = 0; i < ts_cnt; i++) { ts = ts_off + i; /* stride not accounted for yet */ if (o2d == NULL) { if (multiclass) n_frame = segdmp_n_seg(ts); } else { for (j = 0, n_frame = 0; j < n_o2d[ts]; j++) { n_frame += segdmp_n_seg(o2d[ts][j]); } } E_INFO("Corpus %u: sz==%u frames%s\n", ts, n_frame, (n_frame > *(uint32 *)cmd_ln_access("-vartiethr") ? "" : " tied var")); if (n_frame == 0) { continue; } E_INFO("Convergence ratios are abs(cur - prior) / abs(prior)\n"); /* Do some variety of k-means clustering */ if (km_timer) timing_start(km_timer); sqerr = cluster(ts, n_stream, n_frame, veclen, mean[i], n_density, &label); if (km_timer) timing_stop(km_timer); if (sqerr < 0) { E_ERROR("Unable to do k-means for state %u; skipping...\n", ts); continue; } /* Given the k-means and assuming equal prior liklihoods * compute the variances */ if (var_timer) timing_start(var_timer); if (full_covar) full_variances(ts, mean[i], fullvar[i], n_density, veclen, n_frame, n_stream, label); else variances(ts, mean[i], var[i], n_density, veclen, n_frame, n_stream, label); if (var_timer) timing_stop(var_timer); if (mixwfn) { /* initialize the mixing weights by counting # of occurrances * of the top codeword over the corpus and normalizing */ init_mixw(mixw[i], mean[i], n_density, veclen, n_frame, n_stream, label); ckd_free(label); if (reest == TRUE && full_covar) E_ERROR("EM re-estimation is not yet supported for full covariances\n"); else if (reest == TRUE) { if (em_timer) timing_start(em_timer); /* Do iterations of EM to estimate the mixture densities */ reest_sum(ts, mean[i], var[i], mixw[i], n_density, n_stream, n_frame, veclen, *(uint32 *)cmd_ln_access("-niter"), FALSE, *(uint32 *)cmd_ln_access("-vartiethr")); if (em_timer) timing_stop(em_timer); } } ++n_corpus; tot_sqerr += sqerr; E_INFO("sqerr [%u] == %e\n", ts, sqerr); } if (n_corpus > 0) { E_INFO("sqerr = %e tot %e rms\n", tot_sqerr, sqrt(tot_sqerr/n_corpus)); } if (!multiclass) s3close(dmp_fp); if (meanfn) { if (s3gau_write(meanfn, (const vector_t ***)mean, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mean file given; none written\n"); } if (varfn) { if (full_covar) { if (s3gau_write_full(varfn, (const vector_t ****)fullvar, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else { if (s3gau_write(varfn, (const vector_t ***)var, ts_cnt, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } } else { E_INFO("No variance file given; none written\n"); } if (mixwfn) { if (s3mixw_write(mixwfn, mixw, ts_cnt, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_INFO("No mixing weight file given; none written\n"); } return S3_SUCCESS; }
main (int32 argc, char *argv[]) { char *str; #if 0 ckd_debug(100000); #endif /* Digest command line argument definitions */ cmd_ln_define (defn); if ((argc == 2) && (strcmp (argv[1], "help") == 0)) { cmd_ln_print_definitions(); exit(1); } /* Look for default or specified arguments file */ str = NULL; if ((argc == 2) && (argv[1][0] != '-')) str = argv[1]; else if (argc == 1) { str = "s3decode.arg"; E_INFO("Looking for default argument file: %s\n", str); } if (str) { /* Build command line argument list from file */ if ((argc = load_argfile (str, argv[0], &argv)) < 0) { fprintf (stderr, "Usage:\n"); fprintf (stderr, "\t%s argument-list, or\n", argv[0]); fprintf (stderr, "\t%s [argument-file] (default file: s3decode.arg)\n\n", argv[0]); cmd_ln_print_definitions(); exit(1); } } cmdline_parse (argc, argv); /* Remove memory allocation restrictions */ unlimit (); #if (! WIN32) { char buf[1024]; gethostname (buf, 1024); buf[1023] = '\0'; E_INFO ("Executing on: %s\n", buf); } #endif E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if ((cmd_ln_access("-mdeffn") == NULL) || (cmd_ln_access("-dictfn") == NULL) || (cmd_ln_access("-lmfn") == NULL)) E_FATAL("Missing -mdeffn, -dictfn, or -lmfn argument\n"); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Read in input databases */ models_init (); /* Allocate timing object */ tm_utt = timing_new (); tot_nfr = 0; /* Initialize forward Viterbi search module */ dag_init (); printf ("\n"); process_ctlfile (); printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); if (tot_nfr > 0) { printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01)); } fflush (stdout); #if (! WIN32) system ("ps auxwww | grep s3dag"); #endif /* Hack!! To avoid hanging problem under Linux */ if (logfp) { fclose (logfp); *stdout = orig_stdout; *stderr = orig_stderr; } exit(0); }
int main(int argc, char *argv[]) { model_def_t *mdef; model_def_entry_t *defn; uint32 n_defn; uint32 *cluster_offset; uint32 max_int; uint32 *state_of; uint32 max_state; uint32 sstate; int32 i; uint32 j; uint32 n_base_phone; acmod_id_t base; acmod_id_t p; float32 ***out; uint32 **smap; char comment[4192]; time_t t; parse_cmd_ln(argc, argv); printf("%s(%d): Reading model definition file %s\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-moddeffn")); if (model_def_read(&mdef, cmd_ln_access("-moddeffn")) != S3_SUCCESS) { exit(1); } defn = mdef->defn; n_defn = mdef->n_defn; printf("%s(%d): %d models defined\n", __FILE__, __LINE__, n_defn); smap = ckd_calloc(n_defn, sizeof(uint32 *)); n_base_phone = acmod_set_n_ci(mdef->acmod_set); cluster_offset = ckd_calloc(n_base_phone+1, sizeof(uint32)); max_int = 0; --max_int; /* underflow offset values to max value */ for (i = 0; i < n_base_phone; i++) { cluster_offset[i] = max_int; } for (i = 0, max_state = 0; i < n_defn; i++) { for (j = 0; j < defn[i].n_state; j++) { sstate = defn[i].state[j]; if ((sstate != TYING_NON_EMITTING) && (defn[i].state[j] > max_state)) max_state = defn[i].state[j]; } } /* record the total # of senones */ cluster_offset[n_base_phone] = max_state+1; state_of = ckd_calloc(max_state+1, sizeof(uint32)); for (i = 0; i <= max_state; i++) state_of[i] = NO_STATE; for (i = 0; i < n_defn; i++) { p = defn[i].p; base = acmod_set_base_phone(mdef->acmod_set, defn[i].p); smap[i] = defn[i].state; for (j = 0; j < defn[i].n_state; j++) { sstate = defn[i].state[j]; if (sstate != TYING_NON_EMITTING) { if (state_of[sstate] == NO_STATE) state_of[sstate] = j; else if (state_of[sstate] != j) { printf("%s %d appears as %d%s and %d%s model states\n", acmod_set_id2name(mdef->acmod_set, acmod_set_base_phone(mdef->acmod_set, defn[i].p)), sstate, state_of[sstate], ord_suff(state_of[sstate]), j, ord_suff(j)); } if ((p != base) && (cluster_offset[base] > sstate)) { cluster_offset[base] = sstate; } } } } /* any untouched CLUSTER_OFFSET's implies a base phone without any CD states. So offset is same as next one */ for (i = (n_base_phone - 1); i >= 0 ; i--) { if (cluster_offset[i] == max_int) cluster_offset[i] = cluster_offset[i+1]; } fflush(stdout); for (i = 0; i < n_base_phone; i++) { if (cluster_offset[i] != max_int) { fprintf(stderr, "%s(%d): %s offset %d\n", __FILE__, __LINE__, acmod_set_id2name(mdef->acmod_set, i), cluster_offset[i]); } else { fprintf(stderr, "%s(%d): %s <no CD states>\n", __FILE__, __LINE__, acmod_set_id2name(mdef->acmod_set, i)); } } fflush(stderr); printf("%s(%d): Reading senone weights in %s with floor %e\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-hmmdir"), *(float32 *)cmd_ln_access("-floor")); out = s2_read_seno_3(mdef->acmod_set, cluster_offset, cmd_ln_access("-hmmdir"), (*(int32 *)cmd_ln_access("-ci2cd") ? NULL : smap), *(float32 *)cmd_ln_access("-floor"), state_of); t = time(NULL); sprintf(comment, "Generated on %s\n\tmoddeffn: %s\n\tfloor: %e\n\thmmdir: %s\n\n\n\n\n\n\n\n\n", ctime(&t), (const char *)cmd_ln_access("-moddeffn"), *(float32 *)cmd_ln_access("-floor"), (const char *)cmd_ln_access("-hmmdir")); fflush(stdout); fprintf(stderr, "%s(%d): writing %s\n", __FILE__, __LINE__, (const char *)cmd_ln_access("-mixwfn")); fflush(stderr); if (s3mixw_write(cmd_ln_access("-mixwfn"), out, cluster_offset[n_base_phone], /* total # states */ S2_N_FEATURE, S2_N_CODEWORD) != S3_SUCCESS) { fflush(stdout); fprintf(stderr, "%s(%d): couldn't write mixture weight file\n", __FILE__, __LINE__); perror(cmd_ln_access("-mixwfn")); fflush(stderr); } ckd_free(state_of); ckd_free(cluster_offset); return 0; }
int32 parse_cmd_ln(int argc, char *argv[]) { uint32 isHelp; uint32 isExample; const char helpstr[]= "Description: \n\ Convert s3 model definition file and s3 mixture weight file to a s2 senddump file."; const char examplestr[]= "Example: \n\ \n\ mk_s2sendump -moddeffn s3mdef -mixwfn s3mixw -sendumpfn s2dir/sendump\n"; static arg_def_t defn[] = { { "-help", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Shows the usage of the tool"}, { "-example", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Shows example of how to use the tool"}, { "-moddeffn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "The model definition file for the model inventory to train" }, { "-mixwfn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "The mixture weight parameter file name"}, { "-sendumpfn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "Output sendump file name"}, { "-pocketsphinx", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Write a PocketSphinx-format senone dump file"}, { "-mwfloor", CMD_LN_FLOAT32, CMD_LN_NO_VALIDATION, "0.00001", "Mixing weight smoothing floor" }, { NULL, CMD_LN_UNDEF, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, NULL } }; cmd_ln_define(defn); if (argc == 1) { cmd_ln_print_definitions(); exit(1); } cmd_ln_parse(argc, argv); isHelp = *(uint32 *) cmd_ln_access("-help"); isExample = *(uint32 *) cmd_ln_access("-example"); if(isHelp){ printf("%s\n\n",helpstr); } if(isExample){ printf("%s\n\n",examplestr); } if(isHelp || isExample){ E_INFO("User asked for help or example.\n"); exit(1); } if(!isHelp && !isExample){ cmd_ln_print_configuration(); } return 0; }
int parse_cmd_ln(int argc, char *argv[]) { uint32 isHelp; uint32 isExample; const char helpstr[]= "Description : \n\ Create a model definition file with tied state from model definition file without tied states. "; const char examplestr[]= "Example: \n\ tiestate -imoddeffn imdef -omoddeffn omdef -treedir trees -psetfn questions \n\ \n\ This is an example of the input and output format, Find more details at, \n\ http://www.speech.cs.cmu.edu/sphinxman"; static arg_def_t defn[] = { { "-help", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Shows the usage of the tool"}, { "-example", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Shows example of how to use the tool"}, { "-imoddeffn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "Untied-state model definition file"}, { "-omoddeffn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "Tied-state model definition file"}, { "-treedir", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "SPHINX-III tree directory containing pruned trees"}, { "-psetfn", CMD_LN_STRING, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, "Phone set definiton file" }, { "-allphones", CMD_LN_BOOLEAN, CMD_LN_NO_VALIDATION, "no", "Use a single tree for each state of all phones"}, { NULL, CMD_LN_UNDEF, CMD_LN_NO_VALIDATION, CMD_LN_NO_DEFAULT, NULL } }; cmd_ln_define(defn); if (argc == 1) { cmd_ln_print_definitions(); exit(1); } cmd_ln_parse(argc, argv); if (cmd_ln_validate() == FALSE) { E_FATAL("Unable to validate command line arguments\n"); } isHelp = *(uint32 *) cmd_ln_access("-help"); isExample = *(uint32 *) cmd_ln_access("-example"); if(isHelp){ printf("%s\n\n",helpstr); } if(isExample){ printf("%s\n\n",examplestr); } if(isHelp || isExample){ E_INFO("User asked for help or example.\n"); exit(1); } if(!isHelp && !isExample){ cmd_ln_print_configuration(); } return 0; }
/* Process utterances in the control file (-ctlfn argument) */ static void process_ctlfile ( void ) { FILE *ctlfp, *sentfp, *mllrctlfp; char *ctlfile, *cepdir, *cepext, *sentfile, *outsentfile, *mllrctlfile; char line[1024], cepfile[1024], ctlspec[1024]; /* CHANGE BY BHIKSHA: ADDED veclen AS A VARIABLE, 6 JAN 98 */ int32 ctloffset, ctlcount, veclen, sf, ef, nfr; /* END OF CHANGES BY BHIKSHA */ char mllrfile[4096], prevmllr[4096], sent[16384]; char uttid[1024]; int32 i, k; float32 **mfc; ctlfile = (char *) cmd_ln_access("-ctlfn"); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); if ((mllrctlfile = (char *) cmd_ln_access("-mllrctlfn")) != NULL) { if ((mllrctlfp = fopen (mllrctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", mllrctlfile); } else mllrctlfp = NULL; prevmllr[0] = '\0'; sentfile = (char *) cmd_ln_access("-insentfn"); if ((sentfp = fopen (sentfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", sentfile); if ((outsentfile = (char *) cmd_ln_access("-outsentfn")) != NULL) { if ((outsentfp = fopen (outsentfile, "w")) == NULL) E_FATAL("fopen(%s,r) failed\n", outsentfile); } E_INFO("Processing ctl file %s\n", ctlfile); cepdir = (char *) cmd_ln_access("-cepdir"); cepext = (char *) cmd_ln_access("-cepext"); assert ((cepdir != NULL) && (cepext != NULL)); /* BHIKSHA: ADDING VECLEN TO ALLOW VECTORS OF DIFFERENT SIZES */ veclen = *((int32 *) cmd_ln_access("-ceplen")); /* END CHANGES, 6 JAN 1998, BHIKSHA */ ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); return; } /* Skipping initial offset */ if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) { if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); ctlcount = 0; break; } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); } --ctloffset; } } /* Process the specified number of utterance or until end of control file */ while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) E_FATAL("Bad ctlfile line: %s\n", line); if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } if (mllrctlfp) { if (fscanf (mllrctlfp, "%s", mllrfile) != 1) E_FATAL ("Unexpected EOF(%s)\n", mllrctlfile); if (strcmp (prevmllr, mllrfile) != 0) { float32 ***A, **B; int32 gid, sid; uint8 *mgau_xform; gauden_mean_reload (g, (char *) cmd_ln_access("-meanfn")); if (mllr_read_regmat (mllrfile, &A, &B, featlen, n_feat) < 0) E_FATAL("mllr_read_regmat failed\n"); mgau_xform = (uint8 *) ckd_calloc (g->n_mgau, sizeof(uint8)); /* Transform each non-CI mixture Gaussian */ for (sid = 0; sid < sen->n_sen; sid++) { if (mdef->cd2cisen[sid] != sid) { /* Otherwise it's a CI senone */ gid = sen->mgau[sid]; if (! mgau_xform[gid]) { mllr_norm_mgau (g->mean[gid], g->n_density, A, B, featlen, n_feat); mgau_xform[gid] = 1; } } } ckd_free (mgau_xform); mllr_free_regmat (A, B, featlen, n_feat); strcpy (prevmllr, mllrfile); } } if (ctlspec[0] != '/') sprintf (cepfile, "%s/%s.%s", cepdir, ctlspec, cepext); else sprintf (cepfile, "%s.%s", ctlspec, cepext); /* Read utterance transcript */ if (fgets (sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); break; } /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp (sent+k, uttid) != 0) E_ERROR("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent+k); } } /* Read and process mfc file */ /* CHANGE BY BHIKSHA; PASSING VECLEN TO s2mfc_read(), 6 JAN 98 */ /* Read mfc file */ /* HACK HACKA HACK BHIKSHA */ { int32 asf, aef; asf = sf; sf = asf - 4; aef = ef; ef = aef + 4; if (sf < 0 ) { E_ERROR("Utterance %s begin %d < 4; ignored\n", uttid, asf); return; } if ((nfr = s2mfc_read (cepfile, sf, ef, &mfc, veclen)) <= 0) E_ERROR("Utt %s: MFC file read (%s) failed\n", uttid, cepfile); /* END CHANGES BY BHIKSHA */ else { E_INFO ("%d mfc frames\n", nfr-8); /* -8 HACK HACKA HACK */ /* Align utterance */ align_utt (sent, mfc+4, nfr-8, ctlspec, uttid); /* +4 HACKA HACK */ } } /* END HACK HACKA HACK */ --ctlcount; } printf ("\n"); while (fgets(line, sizeof(line), ctlfp) != NULL) { if (sscanf (line, "%s", ctlspec) > 0) { E_INFO("Skipping rest of control file beginning with:\n\t%s", line); break; } } fclose (ctlfp); fclose (sentfp); if (outsentfp) fclose (outsentfp); if (mllrctlfp) fclose (mllrctlfp); }
main (int32 argc, char *argv[]) { char *str; #if 0 ckd_debug(100000); #endif E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); /* Digest command line argument definitions */ cmd_ln_define (defn); if ((argc == 2) && (strcmp (argv[1], "help") == 0)) { cmd_ln_print_definitions(); exit(1); } /* Look for default or specified arguments file */ str = NULL; if ((argc == 2) && (argv[1][0] != '-')) str = argv[1]; else if (argc == 1) { str = "s3align.arg"; E_INFO("Looking for default argument file: %s\n", str); } if (str) { /* Build command line argument list from file */ if ((argc = load_argfile (str, argv[0], &argv)) < 0) { fprintf (stderr, "Usage:\n"); fprintf (stderr, "\t%s argument-list, or\n", argv[0]); fprintf (stderr, "\t%s [argument-file] (default file: s3align.arg)\n\n", argv[0]); cmd_ln_print_definitions(); exit(1); } } cmdline_parse (argc, argv); if ((cmd_ln_access("-mdeffn") == NULL) || (cmd_ln_access("-meanfn") == NULL) || (cmd_ln_access("-varfn") == NULL) || (cmd_ln_access("-mixwfn") == NULL) || (cmd_ln_access("-tmatfn") == NULL) || (cmd_ln_access("-dictfn") == NULL)) E_FATAL("Missing -mdeffn, -meanfn, -varfn, -mixwfn, -tmatfn, or -dictfn argument\n"); if ((cmd_ln_access("-ctlfn") == NULL) || (cmd_ln_access("-insentfn") == NULL)) E_FATAL("Missing -ctlfn or -insentfn argument\n"); if ((cmd_ln_access ("-s2stsegdir") == NULL) && (cmd_ln_access ("-stsegdir") == NULL) && (cmd_ln_access ("-phsegdir") == NULL) && (cmd_ln_access ("-wdsegdir") == NULL) && (cmd_ln_access ("-outsentfn") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); tm_utt = timing_new (); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Initialize feature stream type */ feat_init ((char *) cmd_ln_access ("-feat")); /* BHIKSHA: PASS CEPSIZE TO FEAT_CEPSIZE, 6 Jan 98 */ cepsize = *((int32 *) cmd_ln_access("-ceplen")); cepsize = feat_cepsize (cepsize); /* END CHANGES BY BHIKSHA */ /* Read in input databases */ models_init (); senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); tmr_utt = cyctimer_new ("U"); tmr_gauden = cyctimer_new ("G"); tmr_senone = cyctimer_new ("S"); tmr_align = cyctimer_new ("A"); /* Initialize align module */ align_init (); printf ("\n"); tot_nfr = 0; process_ctlfile (); if (tot_nfr > 0) { printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01)); } #if (! WIN32) system ("ps aguxwww | grep s3align"); #endif /* Hack!! To avoid hanging problem under Linux */ if (logfp) { fclose (logfp); *stdout = orig_stdout; *stderr = orig_stderr; } exit(0); }
/* * Load a DAG from a file: each unique <word-id,start-frame> is a node, i.e. with * a single start time but it can represent several end times. Links are created * whenever nodes are adjacent in time. * Return value: ptr to DAG structure if successful; NULL otherwise. */ dag_t *dag_load (char *file) { FILE *fp; dag_t *dag; int32 seqid, sf, fef, lef, ef; char line[16384], wd[4096]; int32 i, j, k; dagnode_t *d, *d2, **darray; s3wid_t w; int32 fudge, min_ef_range; E_INFO("Reading DAG file: %s\n", file); if ((fp = fopen (file, "r")) == NULL) { E_ERROR("fopen(%s,r) failed\n", file); return NULL; } dag = ckd_calloc (1, sizeof(dag_t)); dag->node_sf = (dagnode_t **) ckd_calloc (S3_MAX_FRAMES, sizeof(dagnode_t *)); dag->nnode = 0; dag->nlink = 0; dag->nfrm = 0; /* Read Frames parameter */ if ((dag->nfrm = dag_param_read (fp, "Frames")) <= 0) E_FATAL("%s: Frames parameter missing or invalid\n", file); /* Read Nodes parameter */ if ((dag->nnode = dag_param_read (fp, "Nodes")) <= 0) E_FATAL("%s: Nodes parameter missing or invalid\n", file); /* Read nodes */ darray = (dagnode_t **) ckd_calloc (dag->nnode, sizeof(dagnode_t *)); for (i = 0; i < dag->nnode; i++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("%s: Premature EOF\n", file); if ((k = sscanf (line, "%d %s %d %d %d", &seqid, wd, &sf, &fef, &lef)) != 5) E_FATAL("%s: Bad line: %s\n", file, line); if ((sf < 0) || (sf >= dag->nfrm) || (fef < 0) || ( fef >= dag->nfrm) || (lef < 0) || ( lef >= dag->nfrm)) E_FATAL("%s: Bad frame info: %s\n", file, line); w = dict_wordid (dict, wd); if (NOT_WID(w)) E_FATAL("%s: Unknown word: %s\n", file, line); if (seqid != i) E_FATAL("%s: Seqno error: %s\n", file, line); d = (dagnode_t *) listelem_alloc (sizeof(dagnode_t)); darray[i] = d; d->wid = w; d->seqid = seqid; d->reachable = 0; d->sf = sf; d->fef = fef; d->lef = lef; d->succlist = NULL; d->predlist = NULL; d->next = dag->node_sf[sf]; dag->node_sf[sf] = d; } /* Read initial node ID */ if (((k = dag_param_read (fp, "Initial")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Initial node parameter missing or invalid\n", file); dag->entry.src = NULL; dag->entry.dst = darray[k]; dag->entry.next = NULL; /* Read final node ID */ if (((k = dag_param_read (fp, "Final")) < 0) || (k >= dag->nnode)) E_FATAL("%s: Final node parameter missing or invalid\n", file); dag->exit.src = NULL; dag->exit.dst = darray[k]; dag->exit.next = NULL; ckd_free (darray); /* That's all I need darray for??? */ /* Read bestsegscore entries; just to make sure all nodes have been read */ if ((k = dag_param_read (fp, "BestSegAscr")) < 0) E_FATAL("%s: BestSegAscr parameter missing\n", file); fclose (fp); /* * Build edges based on time-adjacency. * min_ef_range = min. endframes that a node must persist for it to be not ignored. * fudge = #frames to be fudged around word begin times */ min_ef_range = *((int32 *) cmd_ln_access ("-min_endfr")); fudge = *((int32 *) cmd_ln_access ("-dagfudge")); if (min_ef_range <= 0) E_FATAL("Bad min_endfr argument: %d\n", min_ef_range); if ((fudge < 0) || (fudge > 2)) E_FATAL("Bad dagfudge argument: %d\n", fudge); dag->nlink = 0; for (sf = 0; sf < dag->nfrm; sf++) { for (d = dag->node_sf[sf]; d; d = d->next) { if ((d->lef - d->fef < min_ef_range - 1) && (d != dag->entry.dst)) continue; if (d->wid == finishwid) continue; for (ef = d->fef - fudge + 1; ef <= d->lef + 1; ef++) { for (d2 = dag->node_sf[ef]; d2; d2 = d2->next) { if ((d2->lef - d2->fef < min_ef_range - 1) && (d2 != dag->exit.dst)) continue; dag_link (d, d2); dag->nlink++; } } } } return dag; }
static void process_reffile (char *reffile) { FILE *rfp, *hfp; char line[16384], uttid[4096], file[4096], lc_uttid[4096]; int32 i, k; dagnode_t ref[MAX_UTT_LEN]; int32 nref, noov, nhyp; int32 tot_err, tot_ref, tot_corr, tot_oov, tot_hyp; dag_t *dag; dpnode_t retval; timing_t *tm; char *latdir, *hypfile; if ((rfp = fopen(reffile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", reffile); latdir = (char *) cmd_ln_access ("-latdir"); hypfile = (char *) cmd_ln_access ("-hyp"); if ((! latdir) && (! hypfile)) E_FATAL("Both -latdir and -hyp arguments missing\n"); if (latdir && hypfile) E_FATAL("-latdir and -hyp arguments are mutually exclusive\n"); hfp = NULL; if (hypfile) { if ((hfp = fopen(hypfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", hypfile); } tot_err = 0; tot_ref = 0; tot_hyp = 0; tot_corr = 0; tot_oov = 0; tm = timing_new ("Utt"); while (fgets(line, sizeof(line), rfp) != NULL) { timing_reset (tm); timing_start (tm); if ((nref = refline2wds (line, ref, &noov, uttid)) < 0) E_FATAL("Bad line in file %s: %s\n", reffile, line); /* Read lattice or hypfile, whichever is specified */ if (latdir) { sprintf (file, "%s/%s.lat", latdir, uttid); dag = dag_load (file); if (! dag) { /* Try lower casing uttid */ strcpy (lc_uttid, uttid); lcase (lc_uttid); sprintf (file, "%s/%s.lat", latdir, lc_uttid); dag = dag_load (file); } } else { if (fgets(line, sizeof(line), hfp) == NULL) E_FATAL("Premature EOF(%s) at uttid %s\n", hypfile, uttid); dag = hypline2dag (uttid, line); } if (dag) { /* Append sentinel silwid node to end of DAG */ dag_append_sentinel (dag, silwid); /* Find best path (returns #errors/#correct and updates *nhyp) */ retval = dp (uttid, dict, oovbegin, ref, nref, dag, &nhyp, 0); dag_destroy (dag); } else { retval.c = 0; retval.e = nref-1; nhyp = 0; } timing_stop (tm); tot_ref += nref-1; tot_hyp += nhyp; tot_err += retval.e; tot_corr += retval.c; tot_oov += noov; printf("(%s) << %d ref; %d %.1f%% oov; %d hyp; %d %.1f%% corr; %d %.1f%% err; %.1fs CPU >>\n", uttid, nref-1, noov, (nref > 1) ? (noov * 100.0) / (nref-1) : 0.0, nhyp, retval.c, (nref > 1) ? (retval.c * 100.0) / (nref-1) : 0.0, retval.e, (nref > 1) ? (retval.e * 100.0) / (nref-1) : 0.0, tm->t_cpu); printf("== %7d ref; %5d %5.1f%% oov; %7d hyp; %7d %5.1f%% corr; %6d %5.1f%% err; %5.1fs CPU; %s\n", tot_ref, tot_oov, (tot_ref > 0) ? (tot_oov * 100.0) / tot_ref : 0.0, tot_hyp, tot_corr, (tot_ref > 0) ? (tot_corr * 100.0) / tot_ref : 0.0, tot_err, (tot_ref > 0) ? (tot_err * 100.0) / tot_ref : 0.0, tm->t_tot_cpu, uttid); fflush (stderr); fflush (stdout); } fclose (rfp); if (hfp) fclose (hfp); printf("SUMMARY: %d ref; %d %.3f%% oov; %d hyp; %d %.3f%% corr; %d %.3f%% err; %.1fs CPU\n", tot_ref, tot_oov, (tot_ref > 0) ? (tot_oov * 100.0) / tot_ref : 0.0, tot_hyp, tot_corr, (tot_ref > 0) ? (tot_corr * 100.0) / tot_ref : 0.0, tot_err, (tot_ref > 0) ? (tot_err * 100.0) / tot_ref : 0.0, tm->t_tot_cpu); }
/* * Load and cross-check all models (acoustic/lexical/linguistic). */ static void models_init ( void ) { float32 varfloor, mixwfloor, tpfloor; int32 i, s; s3cipid_t ci; s3wid_t w; char *arg; dict_t *dict; /* HMM model definition */ mdef = mdef_init ((char *) cmd_ln_access("-mdeffn")); /* Dictionary */ dict = dict_init ((char *) cmd_ln_access("-dictfn"), (char *) cmd_ln_access("-fdictfn")); /* HACK!! Make sure SILENCE_WORD, START_WORD and FINISH_WORD are in dictionary */ silwid = dict_wordid (SILENCE_WORD); startwid = dict_wordid (START_WORD); finishwid = dict_wordid (FINISH_WORD); if (NOT_WID(silwid) || NOT_WID(startwid) || NOT_WID(finishwid)) { E_FATAL("%s, %s, or %s missing from dictionary\n", SILENCE_WORD, START_WORD, FINISH_WORD); } if ((dict->filler_start > dict->filler_end) || (! dict_filler_word (silwid))) E_FATAL("%s must occur (only) in filler dictionary\n", SILENCE_WORD); /* No check that alternative pronunciations for filler words are in filler range!! */ /* Codebooks */ varfloor = *((float32 *) cmd_ln_access("-varfloor")); g = gauden_init ((char *) cmd_ln_access("-meanfn"), (char *) cmd_ln_access("-varfn"), varfloor); /* Verify codebook feature dimensions against libfeat */ n_feat = feat_featsize (&featlen); if (n_feat != g->n_feat) E_FATAL("#feature mismatch: s2= %d, mean/var= %d\n", n_feat, g->n_feat); for (i = 0; i < n_feat; i++) if (featlen[i] != g->featlen[i]) E_FATAL("featlen[%d] mismatch: s2= %d, mean/var= %d\n", i, featlen[i], g->featlen[i]); /* Senone mixture weights */ mixwfloor = *((float32 *) cmd_ln_access("-mwfloor")); sen = senone_init ((char *) cmd_ln_access("-mixwfn"), (char *) cmd_ln_access("-senmgaufn"), mixwfloor); /* Verify senone parameters against gauden parameters */ if (sen->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, sen->n_feat); if (sen->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, sen->n_cw); if (sen->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); if (sen->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", sen->n_gauden, g->n_mgau); /* Verify senone parameters against model definition parameters */ if (mdef->n_sen != sen->n_sen) E_FATAL("Model definition has %d senones; but #senone= %d\n", mdef->n_sen, sen->n_sen); /* CD/CI senone interpolation weights file, if present */ if ((arg = (char *) cmd_ln_access ("-lambdafn")) != NULL) { interp = interp_init (arg); /* Verify interpolation weights size with senones */ if (interp->n_sen != sen->n_sen) E_FATAL("Interpolation file has %d weights; but #senone= %d\n", interp->n_sen, sen->n_sen); } else interp = NULL; /* Transition matrices */ tpfloor = *((float32 *) cmd_ln_access("-tpfloor")); tmat = tmat_init ((char *) cmd_ln_access("-tmatfn"), tpfloor); /* Verify transition matrices parameters against model definition parameters */ if (mdef->n_tmat != tmat->n_tmat) E_FATAL("Model definition has %d tmat; but #tmat= %d\n", mdef->n_tmat, tmat->n_tmat); if (mdef->n_emit_state != tmat->n_state-1) E_FATAL("#Emitting states in model definition = %d, #states in tmat = %d\n", mdef->n_emit_state, tmat->n_state); arg = (char *) cmd_ln_access ("-agc"); if ((strcmp (arg, "max") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -agc argument: %s\n", arg); arg = (char *) cmd_ln_access ("-cmn"); if ((strcmp (arg, "current") != 0) && (strcmp (arg, "none") != 0)) E_FATAL("Unknown -cmn argument: %s\n", arg); }
float64 best_q(float32 ****mixw, /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */ float32 ****means, float32 ****vars, uint32 *veclen, /* END ADDITION FOR CONTINUOUS_TREES */ uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 **dfeat, uint32 n_dfeat, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 *id, uint32 n_id, float32 ***dist, /* ADDITION FOR CONTINUOUS_TREES 21 May 98 */ float64 node_wt_ent, /* Weighted entropy of node */ /* END ADDITION FOR CONTINUOUS_TREES */ quest_t **out_best_q) { float32 ***yes_dist; /* ADDITION FOR CONTINUOUS_TREES */ float32 ***yes_means=0; float32 ***yes_vars=0; float32 varfloor=0; float64 y_ent; /* END ADDITION FOR CONTINUOUS_TREES */ float64 yes_dnom, yes_norm; uint32 *yes_id; float32 ***no_dist; /* ADDITION FOR CONTINUOUS_TREES */ float32 ***no_means=0; float32 ***no_vars=0; float64 n_ent; /* END ADDITION FOR CONTINUOUS_TREES */ float64 no_dnom, no_norm; uint32 *no_id; uint32 n_yes, n_b_yes = 0; uint32 n_no, n_b_no = 0; uint32 i, j, k, q, b_q=0, s; uint32 ii; float64 einc, b_einc = -1.0e+50; /* ADDITION FOR CONTINUOUS_TREES; 20 May 98 */ char* type; uint32 continuous, sumveclen=0; type = (char *)cmd_ln_access("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; if (continuous == 1) { varfloor = *(float32 *)cmd_ln_access("-varfloor"); /* Allocating for sumveclen is overallocation, but it eases coding */ for (ii=0,sumveclen=0; ii<n_stream; ii++) sumveclen += veclen[ii]; yes_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); yes_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); no_means = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); no_vars = (float32 ***)ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); } /* END ADDITIONS FOR CONTINUOUS_TREES */ n_yes = n_no = 0; yes_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); no_dist = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); for (q = 0; q < n_all_q; q++) { memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); /* ADDITION FOR CONTINUOUS_TREES; If continuous hmm initialize means and vars to zero */ if (continuous == 1) { memset(&yes_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&yes_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&no_means[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); memset(&no_vars[0][0][0], 0, sizeof(float32) * n_state * n_stream * sumveclen); } /* END ADDITION FOR CONTINUOUS_TREES */ n_yes = n_no = 0; for (ii = 0; ii < n_id; ii++) { i = id[ii]; if (eval_quest(&all_q[q], dfeat[i], n_dfeat)) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] += mixw[i][s][j][k]; } } } /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */ if (continuous == 1) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < veclen[j]; k++) { yes_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k]; yes_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]); } } } } /* END MODIFICATION FOR CONTINUOUS_TREES */ ++n_yes; } else { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] += mixw[i][s][j][k]; } } } /* MODIFICATION FOR CONTINUOUS_TREES: ADDITIONS FOR CONTINUOUS CASE */ if (continuous == 1) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < veclen[j]; k++) { no_means[s][j][k] += mixw[i][s][j][0] * means[i][s][j][k]; no_vars[s][j][k] += mixw[i][s][j][0] * (vars[i][s][j][k] + means[i][s][j][k]*means[i][s][j][k]); } } } } /* END MODIFICATION FOR CONTINUOUS_TREES */ ++n_no; } } if ((n_yes == 0) || (n_no == 0)) { /* no split. All satisfy or all don't satisfy */ continue; } for (s = 0, einc = 0; s < n_state; s++) { for (k = 0, yes_dnom = 0; k < n_density; k++) { yes_dnom += yes_dist[s][0][k]; } if (yes_dnom == 0) break; yes_norm = 1.0 / yes_dnom; for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] *= yes_norm; } } for (k = 0, no_dnom = 0; k < n_density; k++) { no_dnom += no_dist[s][0][k]; } if (no_dnom == 0) break; no_norm = 1.0 / no_dnom; for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] *= no_norm; } } /* MODIFICATION FOR CONTINUOUS_TREES: Do appropriate operations for discrete and continuous */ if (continuous == 1) { y_ent = 0; n_ent = 0; for (j = 0; j < n_stream; j++) { if (yes_dnom != 0) { for (k = 0; k < veclen[j]; k++) { yes_means[s][j][k] *= yes_norm; yes_vars[s][j][k] = yes_vars[s][j][k]*yes_norm - yes_means[s][j][k]*yes_means[s][j][k]; if (yes_vars[s][j][k] < varfloor) yes_vars[s][j][k] = varfloor; } } if (no_dnom != 0) { for (k = 0; k < veclen[j]; k++) { no_means[s][j][k] *= no_norm; no_vars[s][j][k] = no_vars[s][j][k]*no_norm - no_means[s][j][k]*no_means[s][j][k]; if (no_vars[s][j][k] < varfloor) no_vars[s][j][k] = varfloor; } } y_ent += yes_dnom * ent_cont(yes_means[s][j],yes_vars[s][j],veclen[j]); n_ent += no_dnom * ent_cont(no_means[s][j],no_vars[s][j],veclen[j]); } einc += (float64)stwt[s] * (y_ent + n_ent); } else { einc += (float64)stwt[s] * wt_ent_inc(yes_dist[s], yes_dnom, no_dist[s], no_dnom, dist[s], n_stream, n_density); } } /* END MODIFICATION FOR CONTINUOUS_TREES */ /* ADDITION FOR CONTINUOUS_TREES; In current code this is true only for continous HMM */ if (continuous == 1) { einc -= node_wt_ent; } /* END ADDITION FOR CONTINUOUS_TREES */ if (s < n_state) { /* Ended iteration over states prematurely; assume 'bad' question */ continue; } if (einc > b_einc) { b_einc = einc; b_q = q; n_b_yes = n_yes; n_b_no = n_no; } } if ((n_b_yes == 0) || (n_b_no == 0)) { /* No best question */ *out_best_q = NULL; return 0; } yes_id = (uint32 *)ckd_calloc(n_b_yes, sizeof(uint32)); no_id = (uint32 *)ckd_calloc(n_b_no, sizeof(uint32)); memset(&yes_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); memset(&no_dist[0][0][0], 0, sizeof(float32) * n_state * n_stream * n_density); n_yes = n_no = 0; for (ii = 0; ii < n_id; ii++) { i = id[ii]; if (eval_quest(&all_q[b_q], dfeat[i], n_dfeat)) { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { yes_dist[s][j][k] += mixw[i][s][j][k]; } } } yes_id[n_yes] = i; ++n_yes; } else { for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0; k < n_density; k++) { no_dist[s][j][k] += mixw[i][s][j][k]; } } } no_id[n_no] = i; ++n_no; } } ckd_free_3d((void ***)yes_dist); ckd_free((void *)yes_id); ckd_free_3d((void ***)no_dist); ckd_free((void *)no_id); /* ADDITION FOR CONTINUOUS_TREES */ if (continuous == 1) { ckd_free_3d((void ***)yes_means); ckd_free_3d((void ***)yes_vars); ckd_free_3d((void ***)no_means); ckd_free_3d((void ***)no_vars); } /* END ADDITION FOR CONTINUOUS_TREES */ *out_best_q = &all_q[b_q]; return b_einc; }
/* * Find Viterbi alignment. */ static void align_utt (char *sent, /* In: Reference transcript */ float32 **mfc, /* In: MFC cepstra for input utterance */ int32 nfr, /* In: #frames of input */ char *ctlspec, /* In: Utt specifiction from control file */ char *uttid) /* In: Utterance id, for logging and other use */ { static float32 **feat = NULL; static int32 w; static int32 topn; static gauden_dist_t ***dist; static int32 *senscr; static s3senid_t *sen_active; static int8 *mgau_active; static char *s2stsegdir; static char *stsegdir; static char *phsegdir; static char *wdsegdir; int32 i, s, sid, gid, n_sen_active, best; char *arg; align_stseg_t *stseg; align_phseg_t *phseg; align_wdseg_t *wdseg; if (! feat) { /* One-time allocation of necessary intermediate variables */ /* Allocate space for a feature vector */ feat = (float32 **) ckd_calloc (n_feat, sizeof(float32 *)); for (i = 0; i < n_feat; i++) feat[i] = (float32 *) ckd_calloc (featlen[i], sizeof(float32)); /* Allocate space for top-N codeword density values in a codebook */ w = feat_window_size (); /* #MFC vectors needed on either side of current frame to compute one feature vector */ topn = *((int32 *) cmd_ln_access("-topn")); if (topn > g->n_density) { E_ERROR("-topn argument (%d) > #density codewords (%d); set to latter\n", topn, g->n_density); topn = g->n_density; } dist = (gauden_dist_t ***) ckd_calloc_3d (g->n_mgau, n_feat, topn, sizeof(gauden_dist_t)); /* Space for one frame of senone scores, and per frame active flags */ senscr = (int32 *) ckd_calloc (sen->n_sen, sizeof(int32)); sen_active = (s3senid_t *) ckd_calloc (sen->n_sen, sizeof(s3senid_t)); mgau_active = (int8 *) ckd_calloc (g->n_mgau, sizeof(int8)); /* Note various output directories */ s2stsegdir = NULL; stsegdir = NULL; phsegdir = NULL; wdsegdir = NULL; if ((arg = (char *) cmd_ln_access ("-s2stsegdir")) != NULL) s2stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-stsegdir")) != NULL) stsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-phsegdir")) != NULL) phsegdir = (char *) ckd_salloc (arg); if ((arg = (char *) cmd_ln_access ("-wdsegdir")) != NULL) wdsegdir = (char *) ckd_salloc (arg); } /* HACK HACKA HACK BHIKSHA if (nfr <= (w<<1)) { E_ERROR("Utterance %s < %d frames (%d); ignored\n", uttid, (w<<1)+1, nfr); return; } END HACK HACKA HACK */ cyctimer_reset_all (); counter_reset_all (); timing_reset (tm_utt); timing_start (tm_utt); cyctimer_resume (tmr_utt); /* AGC and CMN */ arg = (char *) cmd_ln_access ("-cmn"); if (strcmp (arg, "current") == 0) norm_mean (mfc-4, nfr+8, cepsize); /* -4 HACKA HACK */ arg = (char *) cmd_ln_access ("-agc"); if (strcmp (arg, "max") == 0) agc_max (mfc, nfr); if (align_build_sent_hmm (sent) != 0) { align_destroy_sent_hmm (); cyctimer_pause (tmr_utt); E_ERROR("No sentence HMM; no alignment for %s\n", uttid); return; } align_start_utt (uttid); /* * A feature vector for frame f depends on input MFC vectors [f-w..f+w]. Hence * the feature vector corresponding to the first w and last w input frames is * undefined. We define them by simply replicating the first and last true * feature vectors (presumably silence regions). */ for (i = 0; i < nfr; i++) { cyctimer_resume (tmr_utt); /* Compute feature vector for current frame from input speech cepstra */ /* HACK HACKA HACK BHIKSHA if (i < w) feat_cep2feat (mfc+w, feat); else if (i >= nfr-w) feat_cep2feat (mfc+(nfr-w-1), feat); else END HACK HACKA HACK */ feat_cep2feat (mfc+i, feat); /* * Evaluate gaussian density codebooks and senone scores for input codeword. * Evaluate only active codebooks and senones. */ /* Obtain active senone flags */ cyctimer_resume (tmr_senone); align_sen_active (sen_active, sen->n_sen); /* Flag all CI senones to active if interpolating */ if (interp) { for (s = 0; s < mdef->n_ci_sen; s++) sen_active[s] = 1; } /* Turn active flags into list (for faster access) */ n_sen_active = 0; for (s = 0; s < mdef->n_sen; s++) { if (sen_active[s]) sen_active[n_sen_active++] = s; } cyctimer_pause (tmr_senone); /* Flag all active mixture-gaussian codebooks */ cyctimer_resume (tmr_gauden); for (gid = 0; gid < g->n_mgau; gid++) mgau_active[gid] = 0; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; mgau_active[sen->mgau[sid]] = 1; } /* Compute topn gaussian density values (for active codebooks) */ for (gid = 0; gid < g->n_mgau; gid++) if (mgau_active[gid]) gauden_dist (g, gid, topn, feat, dist[gid]); cyctimer_pause (tmr_gauden); /* Evaluate active senones */ cyctimer_resume (tmr_senone); best = (int32) 0x80000000; for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] = senone_eval (sen, sid, dist[sen->mgau[sid]], topn); if (best < senscr[sid]) best = senscr[sid]; } if (interp) { for (s = 0; s < n_sen_active; s++) { if ((sid = sen_active[s]) >= mdef->n_ci_sen) interp_cd_ci (interp, senscr, sid, mdef->cd2cisen[sid]); } } /* Normalize senone scores (interpolation above can only lower best score) */ for (s = 0; s < n_sen_active; s++) { sid = sen_active[s]; senscr[sid] -= best; } senscale[i] = best; cyctimer_pause (tmr_senone); /* Step alignment one frame forward */ cyctimer_resume (tmr_align); align_frame (senscr); cyctimer_pause (tmr_align); cyctimer_pause (tmr_utt); } timing_stop (tm_utt); printf ("\n"); /* Wind up alignment for this utterance */ if (align_end_utt (&stseg, &phseg, &wdseg) < 0) E_ERROR("Final state not reached; no alignment for %s\n\n", uttid); else { if (s2stsegdir) write_s2stseg (s2stsegdir, stseg, uttid, ctlspec); if (stsegdir) write_stseg (stsegdir, stseg, uttid, ctlspec); if (phsegdir) write_phseg (phsegdir, phseg, uttid, ctlspec); if (wdsegdir) write_wdseg (wdsegdir, wdseg, uttid, ctlspec); if (outsentfp) write_outsent (outsentfp, wdseg, uttid); } align_destroy_sent_hmm (); cyctimer_print_all_norm (stdout, nfr*0.01, tmr_utt); counter_print_all (stdout); printf("EXECTIME: %5d frames, %7.2f sec CPU, %6.2f xRT; %7.2f sec elapsed, %6.2f xRT\n", nfr, tm_utt->t_cpu, tm_utt->t_cpu * 100.0 / nfr, tm_utt->t_elapsed, tm_utt->t_elapsed * 100.0 / nfr); tot_nfr += nfr; }