static void utt_align(void *data, utt_res_t * ur, int32 sf, int32 ef, char *uttid) { int32 nfr; int k, i; const char *cepdir; const char *cepext; char sent[16384]; cmd_ln_t *config = (cmd_ln_t*) data; cepdir = cmd_ln_str_r(kbc->config, "-cepdir"); cepext = cmd_ln_str_r(kbc->config, "-cepext"); /* UGLY! */ /* Read utterance transcript and match it with the control file. */ if (fgets(sent, sizeof(sent), sentfp) == NULL) { E_FATAL("EOF(%s) of the transcription\n", sentfile); } /* E_INFO("SENT %s\n",sent); */ /* Strip utterance id from the end of the transcript */ for (k = strlen(sent) - 1; (k > 0) && ((sent[k] == '\n') || (sent[k] == '\t') || (sent[k] == ' ')); --k); if ((k > 0) && (sent[k] == ')')) { for (--k; (k >= 0) && (sent[k] != '('); --k); if ((k >= 0) && (sent[k] == '(')) { sent[k] = '\0'; /* Check that uttid in transcript and control file match */ for (i = ++k; sent[i] && (sent[i] != ')') && (sent[i] != '\n') && (sent[i] != '\t') && (sent[i] != ' '); i++); sent[i] = '\0'; if (id_cmp(sent + k, uttid) != 0) E_ERROR ("Uttid mismatch: ctlfile = \"%s\"; transcript = \"%s\"\n", uttid, sent + k); } } /* Convert input file to cepstra if waveform input is selected */ if (cmd_ln_boolean_r(config, "-adcin")) { int16 *adcdata; int32 nsamps = 0; mfcc_t **mfcc; if ((adcdata = bio_read_wavfile(cmd_ln_str_r(config, "-cepdir"), ur->uttfile, cmd_ln_str_r(config, "-cepext"), cmd_ln_int32_r(config, "-adchdr"), strcmp(cmd_ln_str_r(config, "-input_endian"), "big"), &nsamps)) == NULL) { E_FATAL("Cannot read file %s\n", ur->uttfile); } fe_start_utt(fe); if (fe_process_utt(fe, adcdata, nsamps, &mfcc, &nfr) < 0) { E_FATAL("MFCC calculation failed\n", ur->uttfile); } ckd_free(adcdata); if (nfr > S3_MAX_FRAMES) { E_FATAL("Maximum number of frames (%d) exceeded\n", S3_MAX_FRAMES); } if ((nfr = feat_s2mfc2feat_live(kbcore_fcb(kbc), mfcc, &nfr, TRUE, TRUE, feat)) < 0) { E_FATAL("Feature computation failed\n"); } if (mfcc) ckd_free_2d((void **)mfcc); } else { nfr = feat_s2mfc2feat(kbcore_fcb(kbc), ur->uttfile, cepdir, cepext, sf, ef, feat, S3_MAX_FRAMES); } if (ur->regmatname) { if (kbc->mgau) adapt_set_mllr(adapt_am, kbc->mgau, ur->regmatname, ur->cb2mllrname, kbc->mdef, kbc->config); else if (kbc->ms_mgau) model_set_mllr(kbc->ms_mgau, ur->regmatname, ur->cb2mllrname, kbcore_fcb(kbc), kbc->mdef, kbc->config); else E_WARN("Can't use MLLR matrices with .s2semi. yet\n"); } if (nfr <= 0) { if (cepdir != NULL) { E_ERROR ("Utt %s: Input file read (%s) with dir (%s) and extension (%s) failed \n", uttid, ur->uttfile, cepdir, cepext); } else { E_ERROR ("Utt %s: Input file read (%s) with extension (%s) failed \n", uttid, ur->uttfile, cepext); } } else { E_INFO("%s: %d input frames\n", uttid, nfr); align_utt(sent, nfr, ur->uttfile, uttid); } }
static uint32 setup_obs_multiclass(uint32 ts, uint32 strm, uint32 n_frame, uint32 veclen) { uint32 i, o, k; uint32 n_i_frame; vector_t **feat; uint32 d_ts; uint32 n_sv_frame; n_sv_frame = n_frame / stride; if ((l_ts == ts) && (l_strm == strm)) { E_INFO("No need to read data; using existing buffered data\n"); return n_sv_frame; } n_tot_frame += n_sv_frame; l_ts = ts; l_strm = strm; E_INFO("alloc'ing %uMb obs buf\n", n_sv_frame*veclen*sizeof(float32) / (1024 * 1024)); if (obuf) { ckd_free(obuf); obuf = NULL; } obuf = ckd_calloc(n_sv_frame * veclen, sizeof(float32)); if (stride == 1) { E_INFO("Reading all frames\n"); } else { E_INFO("Reading 1 out of every %u frames from obs dmp file...\n", stride); } if (o2d) { E_INFO("o_ts == %u ->", ts); for (k = 0; k < n_o2d[ts]; k++) { E_INFOCONT(" %d", o2d[ts][k]); } E_INFOCONT("\n"); for (k = 0, o = 0; k < n_o2d[ts]; k++) { d_ts = o2d[ts][k]; for (i = 0; segdmp_next_feat(d_ts, &feat, &n_i_frame); i++) { assert(n_i_frame == 1); if ((i % stride) == 0) { memcpy(&obuf[o], (void *)&feat[0][strm][0], sizeof(float32) * veclen); o += veclen; } ckd_free((void *)&feat[0][0][0]); ckd_free_2d((void **)feat); } } } else { E_INFO("dmp mdef == output mdef\n"); for (i = 0, o = 0; segdmp_next_feat(ts, &feat, &n_i_frame); i++) { assert(n_i_frame == 1); if ((i % stride) == 0) { memcpy(&obuf[o], (void *)&feat[0][strm][0], sizeof(float32) * veclen); o += veclen; } ckd_free((void *)&feat[0][0][0]); ckd_free_2d((void **)feat); } } if ((o / veclen) != n_sv_frame) { E_WARN("Expected %u frames, but read %u\n", n_sv_frame, o / veclen); } E_INFO("done reading %u frames\n", n_sv_frame); return n_sv_frame; }
static void read_ngram_instance(lineiter_t ** li, hash_table_t * wid, logmath_t * lmath, int order, int order_max, ngram_raw_t * raw_ngram) { int n; int words_expected; int i; char *wptr[NGRAM_MAX_ORDER + 1]; uint32 *word_out; *li = lineiter_next(*li); if (*li == NULL) { E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n", order); return; } string_trim((*li)->buf, STRING_BOTH); words_expected = order + 1; if ((n = str2words((*li)->buf, wptr, NGRAM_MAX_ORDER + 1)) < words_expected) { if ((*li)->buf[0] != '\0') { E_WARN("Format error; %d-gram ignored: %s\n", order, (*li)->buf); } } else { if (order == order_max) { raw_ngram->weights = (float *) ckd_calloc(1, sizeof(*raw_ngram->weights)); raw_ngram->weights[0] = atof_c(wptr[0]); if (raw_ngram->weights[0] > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, raw_ngram->weights[0]); } else { float weight, backoff; raw_ngram->weights = (float *) ckd_calloc(2, sizeof(*raw_ngram->weights)); weight = atof_c(wptr[0]); if (weight > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } else { raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, weight); } if (n == order + 1) { raw_ngram->weights[1] = 0.0f; } else { backoff = atof_c(wptr[order + 1]); raw_ngram->weights[1] = logmath_log10_to_log_float(lmath, backoff); } } raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (word_out = raw_ngram->words + order - 1, i = 1; word_out >= raw_ngram->words; --word_out, i++) { hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out); } } }
tmat_t *tmat_init (char *file_name, float64 tpfloor) { char tmp; int32 n_src, n_dst; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 **tp; int32 i, j, k, tp_per_tmat; char **argname, **argval; tmat_t *t; E_INFO("Reading HMM transition probability matrices: %s\n", file_name); t = (tmat_t *) ckd_calloc (1, sizeof(tmat_t)); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], TMAT_PARAM_VERSION); } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #tmat, #from-states, #to-states, arraysize */ if ((bio_fread (&(t->n_tmat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (t->n_tmat >= MAX_S3TMATID) E_FATAL("%s: #tmat (%d) exceeds limit (%d)\n", file_name, t->n_tmat, MAX_S3TMATID); if (n_dst != n_src+1) E_FATAL("%s: #from-states(%d) != #to-states(%d)-1\n", file_name, n_src, n_dst); t->n_state = n_src; if (i != t->n_tmat * n_src * n_dst) { E_FATAL("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, t->n_tmat, n_src, n_dst); } /* Allocate memory for tmat data */ t->tp = (int32 ***) ckd_calloc_3d (t->n_tmat, n_src, n_dst, sizeof(int32)); /* Temporary structure to read in the float data */ tp = (float32 **) ckd_calloc_2d (n_src, n_dst, sizeof(float32)); /* Read transition matrices, normalize and floor them, and convert to logs3 domain */ tp_per_tmat = n_src * n_dst; for (i = 0; i < t->n_tmat; i++) { if (bio_fread (tp[0], sizeof(float32), tp_per_tmat, fp, byteswap, &chksum) != tp_per_tmat) { E_FATAL("fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ for (j = 0; j < n_src; j++) { if (vector_sum_norm (tp[j], n_dst) == 0.0) E_ERROR("Normalization failed for tmat %d from state %d\n", i, j); vector_nz_floor (tp[j], n_dst, tpfloor); vector_sum_norm (tp[j], n_dst); /* Convert to logs3. Take care of special case when tp = 0.0! */ for (k = 0; k < n_dst; k++) t->tp[i][j][k] = (tp[j][k] == 0.0) ? S3_LOGPROB_ZERO : logs3(tp[j][k]); } } ckd_free_2d ((void **) tp); if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&tmp, 1, 1, fp) == 1) E_ERROR("Non-empty file beyond end of data\n"); fclose(fp); E_INFO("Read %d transition matrices of size %dx%d\n", t->n_tmat, t->n_state, t->n_state+1); if (tmat_chk_uppertri (t) < 0) E_FATAL("Tmat not upper triangular\n"); return t; }
uint32 setup_d2o_map(model_def_t *d_mdef, model_def_t *o_mdef) { model_def_entry_t *o_defn, *d_defn; uint32 d_ts; uint32 o_ts; uint32 *mapped; uint32 i, j, k, d; const char *nm; int did_warn = FALSE; if (d_mdef->n_tied_state < o_mdef-> n_tied_state) { E_FATAL("more tied states in output than in dump mdef (%u vs %u)\n", o_mdef->n_tied_state, d_mdef->n_tied_state); } if (d_mdef->n_tied_ci_state != o_mdef->n_tied_ci_state) { E_FATAL("# tied ci state in output, %u not equal to # in dmp, %u\n", o_mdef->n_tied_ci_state, d_mdef->n_tied_ci_state); } n_o2d = (uint32 *)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32)); i_o2d = (uint32 *)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32)); o2d = (uint32 **)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32 *)); mapped = (uint32 *)ckd_calloc(d_mdef->n_tied_state, sizeof(uint32)); for (i = 0; i < o_mdef->n_defn; i++) { nm = acmod_set_id2name(o_mdef->acmod_set, i); d = acmod_set_name2id(d_mdef->acmod_set, nm); if (d == NO_ID) { if (!did_warn) { E_WARN("Some models in the output mdef not in the dump mdef\n"); did_warn = TRUE; } continue; } o_defn = &o_mdef->defn[i]; d_defn = &d_mdef->defn[d]; for (j = 0; j < o_defn->n_state; j++) { o_ts = o_defn->state[j]; d_ts = d_defn->state[j]; if ((o_ts != TYING_NO_ID) && (o_ts != TYING_NO_ID)) { if (mapped[d_ts] == FALSE) { ++n_o2d[o_ts]; mapped[d_ts] = TRUE; } } else { if (!((o_ts == TYING_NO_ID) && (o_ts == TYING_NO_ID))) { E_INFO("%s state is NULL but %s isn't.\n", (o_ts == TYING_NO_ID ? "output" : "dump"), (o_ts == TYING_NO_ID ? "dump" : "output")); } } } } for (i = 0; i < o_mdef->n_tied_state; i++) { o2d[i] = (uint32 *)ckd_calloc(n_o2d[i], sizeof(uint32)); } for (i = 0; i < o_mdef->n_defn; i++) { /* Figure out the index in the dump mdef for the model in the output mdef */ nm = acmod_set_id2name(o_mdef->acmod_set, i); d = acmod_set_name2id(d_mdef->acmod_set, nm); if (d == NO_ID) continue; o_defn = &o_mdef->defn[i]; d_defn = &d_mdef->defn[d]; for (j = 0; j < o_defn->n_state; j++) { o_ts = o_defn->state[j]; d_ts = d_defn->state[j]; if ((o_ts != TYING_NO_ID) && (o_ts != TYING_NO_ID)) { for (k = 0; k < i_o2d[o_ts]; k++) { if (o2d[o_ts][k] == d_ts) break; } if (k == i_o2d[o_ts]) { o2d[o_ts][i_o2d[o_ts]++] = d_ts; } } else { if (!((o_ts == TYING_NO_ID) && (o_ts == TYING_NO_ID))) { E_INFO("%s state is NULL but %s isn't.\n", (o_ts == TYING_NO_ID ? "output" : "dump"), (o_ts == TYING_NO_ID ? "dump" : "output")); } } } } for (i = 0; i < o_mdef->n_tied_state; i++) { if (i_o2d[i] != n_o2d[i]) { E_FATAL("%u != %u for %u\n", i_o2d[i], n_o2d[i], i); } } for (i = 0; i < o_mdef->n_tied_state; i++) { i_o2d[i] = 0; } return S3_SUCCESS; }
subvq_t *subvq_init (char *file, float64 varfloor, int32 max_sv, mgau_model_t *g) { FILE *fp; char line[16384]; int32 n_sv; /* #Subvectors in file, as opposed to that used */ int32 s, k, l, n, r, c; char *strp; subvq_t *vq; VQ_EVAL = cmd_ln_int32 ("-vqeval"); /* RAH, Currently only works when n_sv = 3, values computed but ignored in other cases */ E_INFO("Loading Mixture Gaussian sub-VQ file '%s' (vq_eval: %d)\n", file,VQ_EVAL); vq = (subvq_t *) ckd_calloc (1, sizeof(subvq_t)); fp = myfopen(file, "r"); /* Read until "Sub-vectors" */ for (;;) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("Failed to read VQParam header\n"); if (sscanf (line, "VQParam %d %d -> %d %d", &(vq->origsize.r), &(vq->origsize.c), &(vq->n_sv), &(vq->vqsize)) == 4) break; } if (g) { if ((g->n_mgau != vq->origsize.r) || (g->max_comp != vq->origsize.c)) E_FATAL("Model size conflict: %d x %d (SubVQ) vs %d x %d (Original)\n", vq->origsize.r, vq->origsize.c, g->n_mgau, g->max_comp); } if (max_sv < 0) max_sv = vq->n_sv; if (max_sv < vq->n_sv) E_INFO("Using %d subvectors out of %d\n", max_sv, vq->n_sv); else if (max_sv > vq->n_sv) { E_WARN("#Subvectors specified(%d) > available(%d); using latter\n", max_sv, vq->n_sv); max_sv = vq->n_sv; } n_sv = vq->n_sv; vq->n_sv = max_sv; if (vq->n_sv < VQ_EVAL) /* RAH, 5.9.01, sanity check to make sure VQ_EVAL isn't higher than the n_sv */ VQ_EVAL = vq->n_sv; vq->featdim = (int32 **) ckd_calloc (vq->n_sv, sizeof(int32 *)); vq->gautbl = (vector_gautbl_t *) ckd_calloc (vq->n_sv, sizeof(vector_gautbl_t)); vq->map = (int32 ***) ckd_calloc_3d (vq->origsize.r, vq->origsize.c, vq->n_sv, sizeof(int32)); /* Read subvector sizes and feature dimension maps */ for (s = 0; s < n_sv; s++) { if ((fgets (line, sizeof(line), fp) == NULL) || (sscanf (line, "Subvector %d length %d%n", &k, &l, &n) != 2) || (k != s)) E_FATAL("Error reading length(subvector %d)\n", s); if (s < vq->n_sv) { vq->gautbl[s].veclen = l; vq->featdim[s] = (int32 *) ckd_calloc (vq->gautbl[s].veclen, sizeof(int32)); for (strp = line+n, c = 0; c < vq->gautbl[s].veclen; c++) { if (sscanf (strp, "%d%n", &(vq->featdim[s][c]), &n) != 1) E_FATAL("Error reading subvector(%d).featdim(%d)\n", s, c); strp += n; } vector_gautbl_alloc (&(vq->gautbl[s]), vq->vqsize, vq->gautbl[s].veclen); } } /* Echo info for sanity check */ E_INFO("Original #codebooks(states)/codewords: %d x %d\n", vq->origsize.r, vq->origsize.c); E_INFO("Subvectors: %d, VQsize: %d\n", vq->n_sv, vq->vqsize); for (s = 0; s < vq->n_sv; s++) { E_INFO("SV %d feature dims(%d): ", s, vq->gautbl[s].veclen); for (c = 0; c < vq->gautbl[s].veclen; c++) fprintf (stderr, " %2d", vq->featdim[s][c]); fprintf (stderr, "\n"); } fflush (stderr); /* Read VQ codebooks and maps for each subvector */ for (s = 0; s < n_sv; s++) { #if 1 int32 veclen = vq->gautbl[s].veclen; int a_veclen = (veclen%4)?veclen+4-(veclen%4):veclen; #endif E_INFO("Reading subvq %d%s\n", s, (s < vq->n_sv) ? "" : " (not used)"); E_INFO("Reading codebook\n"); if ((fgets (line, sizeof(line), fp) == NULL) || (sscanf (line, "Codebook %d", &k) != 1) || (k != s)) E_FATAL("Error reading codebook header\n", s); for (r = 0; r < vq->vqsize; r++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("Error reading row(%d)\n", r); if (s >= vq->n_sv) continue; for (strp = line, c = 0; c < veclen/*vq->gautbl[s].veclen*/; c++) { if (sscanf (strp, "%f %f%n", &(vq->gautbl[s].mean[r][c]), &(vq->gautbl[s].var[r][c]), &k) != 2) E_FATAL("Error reading row(%d) col(%d)\n", r, c); strp += k; } #if 1 for (;c<a_veclen;c++) { vq->gautbl[s].mean[r][c]=0.0; vq->gautbl[s].var[r][c]=0.0; } #endif } E_INFO("Reading map\n"); if ((fgets (line, sizeof(line), fp) == NULL) || (sscanf (line, "Map %d", &k) != 1) || (k != s)) E_FATAL("Error reading map header\n", s); for (r = 0; r < vq->origsize.r; r++) { if (fgets (line, sizeof(line), fp) == NULL) E_FATAL("Error reading row(%d)\n", r); if (s >= vq->n_sv) continue; for (strp = line, c = 0; c < vq->origsize.c; c++) { if (sscanf (strp, "%d%n", &(vq->map[r][c][s]), &k) != 1) E_FATAL("Error reading row(%d) col(%d)\n", r, c); strp += k; } } fflush (stdout); } if ((fscanf (fp, "%s", line) != 1) || (strcmp (line, "End") != 0)) E_FATAL("Error reading 'End' token\n"); fclose (fp); subvq_maha_precomp (vq, varfloor); subvq_map_compact (vq, g); subvq_map_linearize (vq); n = 0; for (s = 0; s < n_sv; s++) { if (vq->gautbl[s].veclen > n) n = vq->gautbl[s].veclen; } assert (n > 0); #ifdef THRD for (s=0; s < vq->n_sv; s++) vq->thrd_subvec[s] = (float32 *) ckd_calloc (n, sizeof(float32)); for (s = 0; s < NUM_THREADS; s++) { vq->thrd_gauscore[s] = (int32 *) ckd_calloc (vq->origsize.c, sizeof(int32)); vq->thrd_mgau_sl[s] = (int32 *) ckd_calloc (vq->origsize.c + 1, sizeof(int32)); } #endif #ifdef USE_ICC vq->subvec = (float32 *) _mm_malloc (n*sizeof(float32),16); #else vq->subvec = (float32 *) ckd_calloc (n, sizeof(float32)); #endif vq->vqdist = (int32 **) ckd_calloc_2d (vq->n_sv, vq->vqsize, sizeof(int32)); vq->gauscore = (int32 *) ckd_calloc (vq->origsize.c, sizeof(int32)); vq->mgau_sl = (int32 *) ckd_calloc (vq->origsize.c + 1, sizeof(int32)); return vq; }
static int corpus_read_next_transcription_line(char **trans) { char utt_id[512]; char *s; /* look for a close paren in the line */ s = strrchr(transcription_line, ')'); if (s != NULL) { int nspace; /* found a close paren */ nspace = strspn(s + 1, " \t\r\n"); if (s[nspace + 1] == '\0') { /* it is at the end of the line */ *s = '\0'; /* terminate the string at the paren */ /* search for a matching open paren */ for (s--; (s >= transcription_line) && (*s != '('); s--); if (*s == '(') { /* found a matching open paren */ assert(strlen(s+1) < 512); strcpy(utt_id, s+1); if (strcmp_ci(utt_id, corpus_utt()) != 0) { char *uttfullname = corpus_utt_full_name(); int suffpos = strlen(uttfullname) - strlen(utt_id); if (suffpos >= 0 && strlen(utt_id) > 0 && strcmp_ci(&uttfullname[suffpos], utt_id) != 0) { E_WARN("Utterance id in transcription file, '%s', does not match filename in control path '%s'.\n", utt_id, uttfullname); } } /* look for the first non-whitespace character before the open paren */ for (--s; (s >= transcription_line) && isspace((unsigned char)*s); s--); if (s < transcription_line) { E_FATAL("Utterance transcription is empty: %s\n", transcription_line); } ++s; *s = '\0'; /* terminate the string at the first whitespace character following the first non-whitespace character found above */ } else { E_ERROR("Expected open paren after ending close paren in line: '%s'\n", transcription_line); return S3_ERROR; } } else { /* close paren not at end of line so assume it is not the close paren associated with the utt id */ } } else { /* No close paren, so no utt id */ /* This is fine, but the user gets no explicit sanity check for the ordering of the LSN file */ } *trans = strdup(transcription_line); return S3_SUCCESS; }
int sphinx_wave2feat_convert_file(sphinx_wave2feat_t *wtf, char const *infile, char const *outfile) { int nchans, minfft, nfft, nfloat, veclen; audio_type_t const *atype; int fshift, fsize; if (cmd_ln_boolean_r(wtf->config, "-verbose")) E_INFO("Converting %s to %s\n", infile, outfile); wtf->infile = ckd_salloc(infile); /* Detect input file type. */ if ((atype = detect_audio_type(wtf)) == NULL) return -1; /* Determine whether to byteswap input. */ wtf->byteswap = strcmp(cmd_ln_str_r(wtf->config, "-mach_endian"), cmd_ln_str_r(wtf->config, "-input_endian")); /* Make sure the FFT size is sufficiently large. */ minfft = (int)(cmd_ln_float32_r(wtf->config, "-samprate") * cmd_ln_float32_r(wtf->config, "-wlen") + 0.5); for (nfft = 1; nfft < minfft; nfft <<= 1) ; if (nfft > cmd_ln_int32_r(wtf->config, "-nfft")) { E_WARN("Value of -nfft = %d is too small, increasing to %d\n", cmd_ln_int32_r(wtf->config, "-nfft"), nfft); cmd_ln_set_int32_r(wtf->config, "-nfft", nfft); fe_free(wtf->fe); wtf->fe = fe_init_auto_r(wtf->config); } /* Get the output frame size (if not already set). */ if (wtf->veclen == 0) wtf->veclen = fe_get_output_size(wtf->fe); /* Set up the input and output buffers. */ fe_get_input_size(wtf->fe, &fshift, &fsize); /* Want to get at least a whole frame plus shift in here. Also we will either pick or mix multiple channels so we need to read them all at once. */ nchans = cmd_ln_int32_r(wtf->config, "-nchans"); wtf->blocksize = cmd_ln_int32_r(wtf->config, "-blocksize") * nchans; if (wtf->blocksize < (fsize + fshift) * nchans) { E_INFO("Block size of %d too small, increasing to %d\n", wtf->blocksize, (fsize + fshift) * nchans); wtf->blocksize = (fsize + fshift) * nchans; } wtf->audio = (short *)ckd_calloc(wtf->blocksize, sizeof(*wtf->audio)); wtf->featsize = (wtf->blocksize / nchans - fsize) / fshift; /* Use the maximum of the input and output frame sizes to allocate this. */ veclen = wtf->veclen; if (wtf->in_veclen > veclen) veclen = wtf->in_veclen; wtf->feat = (mfcc_t**)ckd_calloc_2d(wtf->featsize, veclen, sizeof(**wtf->feat)); /* Let's go! */ if ((wtf->outfh = fopen(outfile, "wb")) == NULL) { E_ERROR_SYSTEM("Failed to open %s for writing", outfile); return -1; } /* Write an empty header, which we'll fill in later. */ if (wtf->ot->output_header && (*wtf->ot->output_header)(wtf, 0) < 0) { E_ERROR_SYSTEM("Failed to write empty header to %s\n", outfile); goto error_out; } wtf->outfile = ckd_salloc(outfile); if ((nfloat = (*atype->decode)(wtf)) < 0) { E_ERROR("Failed to convert"); goto error_out; } if (wtf->ot->output_header) { if (fseek(wtf->outfh, 0, SEEK_SET) < 0) { E_ERROR_SYSTEM("Failed to seek to beginning of %s\n", outfile); goto error_out; } if ((*wtf->ot->output_header)(wtf, nfloat) < 0) { E_ERROR_SYSTEM("Failed to write header to %s\n", outfile); goto error_out; } } if (wtf->audio) ckd_free(wtf->audio); if (wtf->feat) ckd_free_2d(wtf->feat); if (wtf->infile) ckd_free(wtf->infile); if (wtf->outfile) ckd_free(wtf->outfile); wtf->audio = NULL; wtf->infile = NULL; wtf->feat = NULL; wtf->outfile = NULL; if (wtf->outfh) if (fclose(wtf->outfh) == EOF) E_ERROR_SYSTEM("Failed to close output file"); wtf->outfh = NULL; return 0; error_out: if (wtf->audio) ckd_free(wtf->audio); if (wtf->feat) ckd_free_2d(wtf->feat); if (wtf->infile) ckd_free(wtf->infile); if (wtf->outfile) ckd_free(wtf->outfile); wtf->audio = NULL; wtf->infile = NULL; wtf->feat = NULL; wtf->outfile = NULL; if (wtf->outfh) if (fclose(wtf->outfh) == EOF) E_ERROR_SYSTEM("Failed to close output file"); wtf->outfh = NULL; return -1; }
word_fsg_t * word_fsg_load(s2_fsg_t * fsg, int use_altpron, int use_filler, kbcore_t *kbc) { float32 silprob = kbc->fillpen->silprob; float32 fillprob = kbc->fillpen->fillerprob; float32 lw = kbc->fillpen->lw; word_fsg_t *word_fsg; s2_fsg_trans_t *trans; int32 n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk; int32 wid; int32 logp; glist_t nulls; int32 i, j; assert(fsg); /* Some error checking */ if (lw <= 0.0) E_WARN("Unusual language-weight value: %.3e\n", lw); if (use_filler && ((silprob < 0.0) || (fillprob < 0.0))) { E_ERROR("silprob/fillprob must be >= 0\n"); return NULL; } if ((fsg->n_state <= 0) || ((fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) || ((fsg->final_state < 0) || (fsg->final_state >= fsg->n_state))) { E_ERROR("Bad #states/start_state/final_state values: %d/%d/%d\n", fsg->n_state, fsg->start_state, fsg->final_state); return NULL; } for (trans = fsg->trans_list; trans; trans = trans->next) { if ((trans->from_state < 0) || (trans->from_state >= fsg->n_state) || (trans->to_state < 0) || (trans->to_state >= fsg->n_state) || (trans->prob <= 0) || (trans->prob > 1.0)) { E_ERROR("Bad transition: P(%d -> %d) = %e\n", trans->from_state, trans->to_state, trans->prob); return NULL; } } word_fsg = (word_fsg_t *) ckd_calloc(1, sizeof(word_fsg_t)); word_fsg->name = ckd_salloc(fsg->name ? fsg->name : ""); word_fsg->n_state = fsg->n_state; word_fsg->start_state = fsg->start_state; word_fsg->final_state = fsg->final_state; word_fsg->use_altpron = use_altpron; word_fsg->use_filler = use_filler; word_fsg->lw = lw; word_fsg->lc = NULL; word_fsg->rc = NULL; word_fsg->dict = kbc->dict; word_fsg->mdef = kbc->mdef; word_fsg->tmat = kbc->tmat; word_fsg->n_ciphone = mdef_n_ciphone(kbc->mdef); /* Allocate non-epsilon transition matrix array */ word_fsg->trans = (glist_t **) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(glist_t)); /* Allocate epsilon transition matrix array */ word_fsg->null_trans = (word_fsglink_t ***) ckd_calloc_2d(word_fsg->n_state, word_fsg->n_state, sizeof(word_fsglink_t *)); /* Process transitions */ n_null_trans = 0; n_alt_trans = 0; n_filler_trans = 0; n_unk = 0; nulls = NULL; for (trans = fsg->trans_list, n_trans = 0; trans; trans = trans->next, n_trans++) { /* Convert prob to logs2prob and apply language weight */ logp = (int32) (logs3(kbcore_logmath(kbc), trans->prob) * lw); /* Check if word is in dictionary */ if (trans->word) { wid = dict_wordid(kbc->dict, trans->word); if (wid < 0) { E_ERROR("Unknown word '%s'; ignored\n", trans->word); n_unk++; } else if (use_altpron) { wid = dict_basewid(kbc->dict, wid); assert(wid >= 0); } } else wid = -1; /* Null transition */ /* Add transition to word_fsg structure */ i = trans->from_state; j = trans->to_state; if (wid < 0) { if (word_fsg_null_trans_add(word_fsg, i, j, logp) == 1) { n_null_trans++; nulls = glist_add_ptr(nulls, (void *) word_fsg->null_trans[i][j]); } } else { word_fsg_trans_add(word_fsg, i, j, logp, wid); /* Add transitions for alternative pronunciations, if any */ if (use_altpron) { for (wid = dict_nextalt(kbc->dict, wid); wid >= 0; wid = dict_nextalt(kbc->dict, wid)) { word_fsg_trans_add(word_fsg, i, j, logp, wid); n_alt_trans++; n_trans++; } } } } /* Add silence and noise filler word transitions if specified */ if (use_filler) { n_filler_trans = word_fsg_add_filler(word_fsg, silprob, fillprob, kbcore_logmath(kbc)); n_trans += n_filler_trans; } E_INFO ("FSG: %d states, %d transitions (%d null, %d alt, %d filler, %d unknown)\n", word_fsg->n_state, n_trans, n_null_trans, n_alt_trans, n_filler_trans, n_unk); #if __FSG_DBG__ E_INFO("FSG before NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Null transitions closure */ nulls = word_fsg_null_trans_closure(word_fsg, nulls); glist_free(nulls); #if __FSG_DBG__ E_INFO("FSG after NULL closure:\n"); word_fsg_write(word_fsg, stdout); #endif /* Compute left and right context CIphone lists for each state */ word_fsg_lc_rc(word_fsg); #if __FSG_DBG__ E_INFO("FSG after lc/rc:\n"); word_fsg_write(word_fsg, stdout); #endif return word_fsg; }
FILE * fopen_compchk(const char *file, int32 * ispipe) { #ifndef HAVE_POPEN *ispipe = 0; /* No popen() on WinCE */ /* And therefore the rest of this function is useless. */ return (fopen_comp(file, "r", ispipe)); #else /* HAVE_POPEN */ int32 isgz; FILE *fh; /* First just try to fopen_comp() it */ if ((fh = fopen_comp(file, "r", ispipe)) != NULL) return fh; else { char *tmpfile; int k; /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ guess_comptype(file, ispipe, &isgz); k = strlen(file); tmpfile = ckd_calloc(k+5, 1); strcpy(tmpfile, file); switch (isgz) { case COMP_GZIP: tmpfile[k - 3] = '\0'; break; case COMP_BZIP2: tmpfile[k - 4] = '\0'; break; case COMP_COMPRESS: tmpfile[k - 2] = '\0'; break; case COMP_NONE: strcpy(tmpfile + k, ".gz"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } strcpy(tmpfile + k, ".bz2"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } strcpy(tmpfile + k, ".Z"); if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { E_WARN("Using %s instead of %s\n", tmpfile, file); ckd_free(tmpfile); return fh; } ckd_free(tmpfile); return NULL; } E_WARN("Using %s instead of %s\n", tmpfile, file); fh = fopen_comp(tmpfile, "r", ispipe); ckd_free(tmpfile); return NULL; } #endif /* HAVE_POPEN */ }
static int run_control_file(sphinx_wave2feat_t *wtf, char const *ctlfile) { hash_table_t *files; hash_iter_t *itor; lineiter_t *li; FILE *ctlfh; int nskip, runlen, npart, rv = 0; if ((ctlfh = fopen(ctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open control file %s", ctlfile); return -1; } nskip = cmd_ln_int32_r(wtf->config, "-nskip"); runlen = cmd_ln_int32_r(wtf->config, "-runlen"); if ((npart = cmd_ln_int32_r(wtf->config, "-npart"))) { /* Count lines in the file. */ int partlen, part, nlines = 0; part = cmd_ln_int32_r(wtf->config, "-part"); for (li = lineiter_start(ctlfh); li; li = lineiter_next(li)) ++nlines; fseek(ctlfh, 0, SEEK_SET); partlen = nlines / npart; nskip = partlen * (part - 1); if (part == npart) runlen = -1; else runlen = partlen; } if (runlen != -1){ E_INFO("Processing %d utterances at position %d\n", runlen, nskip); files = hash_table_new(runlen, HASH_CASE_YES); } else { E_INFO("Processing all remaining utterances at position %d\n", nskip); files = hash_table_new(1000, HASH_CASE_YES); } for (li = lineiter_start(ctlfh); li; li = lineiter_next(li)) { char *c, *infile, *outfile; if (nskip-- > 0) continue; if (runlen == 0) { lineiter_free(li); break; } --runlen; string_trim(li->buf, STRING_BOTH); /* Extract the file ID from the control line. */ if ((c = strchr(li->buf, ' ')) != NULL) *c = '\0'; if (strlen(li->buf) == 0) { E_WARN("Empty line %d in control file, skipping\n", li->lineno); continue; } build_filenames(wtf->config, li->buf, &infile, &outfile); if (hash_table_lookup(files, infile, NULL) == 0) continue; rv = sphinx_wave2feat_convert_file(wtf, infile, outfile); hash_table_enter(files, infile, outfile); if (rv != 0) { lineiter_free(li); break; } } for (itor = hash_table_iter(files); itor; itor = hash_table_iter_next(itor)) { ckd_free((void *)hash_entry_key(itor->ent)); ckd_free(hash_entry_val(itor->ent)); } hash_table_free(files); if (fclose(ctlfh) == EOF) E_ERROR_SYSTEM("Failed to close control file"); return rv; }
main (int32 argc, char *argv[]) { char *str; #if 0 ckd_debug(100000); #endif /* Digest command line argument definitions */ cmd_ln_define (defn); if ((argc == 2) && (strcmp (argv[1], "help") == 0)) { cmd_ln_print_definitions(); exit(1); } /* Look for default or specified arguments file */ str = NULL; if ((argc == 2) && (argv[1][0] != '-')) str = argv[1]; else if (argc == 1) { str = "s3decode.arg"; E_INFO("Looking for default argument file: %s\n", str); } if (str) { /* Build command line argument list from file */ if ((argc = load_argfile (str, argv[0], &argv)) < 0) { fprintf (stderr, "Usage:\n"); fprintf (stderr, "\t%s argument-list, or\n", argv[0]); fprintf (stderr, "\t%s [argument-file] (default file: s3decode.arg)\n\n", argv[0]); cmd_ln_print_definitions(); exit(1); } } cmdline_parse (argc, argv); /* Remove memory allocation restrictions */ unlimit (); #if (! WIN32) { char buf[1024]; gethostname (buf, 1024); buf[1023] = '\0'; E_INFO ("Executing on: %s\n", buf); } #endif E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); if ((cmd_ln_access("-mdeffn") == NULL) || (cmd_ln_access("-dictfn") == NULL) || (cmd_ln_access("-lmfn") == NULL)) E_FATAL("Missing -mdeffn, -dictfn, or -lmfn argument\n"); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Read in input databases */ models_init (); /* Allocate timing object */ tm_utt = timing_new (); tot_nfr = 0; /* Initialize forward Viterbi search module */ dag_init (); printf ("\n"); process_ctlfile (); printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); if (tot_nfr > 0) { printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01)); } fflush (stdout); #if (! WIN32) system ("ps auxwww | grep s3dag"); #endif /* Hack!! To avoid hanging problem under Linux */ if (logfp) { fclose (logfp); *stdout = orig_stdout; *stderr = orig_stderr; } exit(0); }
/* Process utterances in the control file (-ctlfn argument) */ static void process_ctlfile ( void ) { FILE *ctlfp, *matchfp, *matchsegfp; char *ctlfile; char *matchfile, *matchsegfile; char line[1024], ctlspec[1024], uttid[1024]; int32 ctloffset, ctlcount; int32 i, k, sf, ef; if ((ctlfile = (char *) cmd_ln_access("-ctlfn")) == NULL) E_FATAL("No -ctlfn argument\n"); E_INFO("Processing ctl file %s\n", ctlfile); if ((ctlfp = fopen (ctlfile, "r")) == NULL) E_FATAL("fopen(%s,r) failed\n", ctlfile); if ((matchfile = (char *) cmd_ln_access("-matchfn")) == NULL) { E_WARN("No -matchfn argument\n"); matchfp = NULL; } else { if ((matchfp = fopen (matchfile, "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", matchfile); } if ((matchsegfile = (char *) cmd_ln_access("-matchsegfn")) == NULL) { E_WARN("No -matchsegfn argument\n"); matchsegfp = NULL; } else { if ((matchsegfp = fopen (matchsegfile, "w")) == NULL) E_ERROR("fopen(%s,w) failed\n", matchsegfile); } ctloffset = *((int32 *) cmd_ln_access("-ctloffset")); if (! cmd_ln_access("-ctlcount")) ctlcount = 0x7fffffff; /* All entries processed if no count specified */ else ctlcount = *((int32 *) cmd_ln_access("-ctlcount")); if (ctlcount == 0) { E_INFO("-ctlcount argument = 0!!\n"); fclose (ctlfp); return; } if (ctloffset > 0) E_INFO("Skipping %d utterances in the beginning of control file\n", ctloffset); while ((ctloffset > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { if (sscanf (line, "%s", ctlspec) > 0) --ctloffset; } while ((ctlcount > 0) && (fgets(line, sizeof(line), ctlfp) != NULL)) { printf ("\n"); E_INFO("Utterance: %s", line); sf = 0; ef = (int32)0x7ffffff0; if ((k = sscanf (line, "%s %d %d %s", ctlspec, &sf, &ef, uttid)) <= 0) continue; /* Empty line */ if ((k == 2) || ( (k >= 3) && ((sf >= ef) || (sf < 0))) ) { E_ERROR("Error in ctlfile spec; skipped\n"); /* What happens to ctlcount??? */ continue; } if (k < 4) { /* Create utt-id from mfc-filename (and sf/ef if specified) */ for (i = strlen(ctlspec)-1; (i >= 0) && (ctlspec[i] != '/'); --i); if (k == 3) sprintf (uttid, "%s_%d_%d", ctlspec+i+1, sf, ef); else strcpy (uttid, ctlspec+i+1); } decode_utt (uttid, matchfp, matchsegfp); --ctlcount; } printf ("\n"); if (fscanf (ctlfp, "%s", line) == 1) E_INFO("Skipping rest of control file beginning with:\n\t%s\n", line); if (matchfp) fclose (matchfp); if (matchsegfp) fclose (matchsegfp); fclose (ctlfp); }
int main(int32 argc, char *argv[]) { char sent[16384]; cmd_ln_t *config; print_appl_info(argv[0]); cmd_ln_appl_enter(argc, argv, "default.arg", defn); unlimit(); config = cmd_ln_get(); ctloffset = cmd_ln_int32_r(config, "-ctloffset"); sentfile = cmd_ln_str_r(config, "-insent"); if ((sentfp = fopen(sentfile, "r")) == NULL) E_FATAL_SYSTEM("Failed to open file %s for reading", sentfile); /* Note various output directories */ if (cmd_ln_str_r(config, "-s2stsegdir") != NULL) s2stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-s2stsegdir")); if (cmd_ln_str_r(config, "-stsegdir") != NULL) stsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-stsegdir")); if (cmd_ln_str_r(config, "-phsegdir") != NULL) phsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phsegdir")); if (cmd_ln_str_r(config, "-phlabdir") != NULL) phlabdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-phlabdir")); if (cmd_ln_str_r(config, "-wdsegdir") != NULL) wdsegdir = (char *) ckd_salloc(cmd_ln_str_r(config, "-wdsegdir")); /* HACK! Pre-read insent without checking whether ctl could also be read. In general, this is caused by the fact that we used multiple files to specify resource in sphinx III. This is easy to solve but currently I just to remove process_ctl because it duplicates badly with ctl_process. The call back function will take care of matching the uttfile names. We don't need to worry too much about inconsistency. */ while (ctloffset > 0) { if (fgets(sent, sizeof(sent), sentfp) == NULL) { E_ERROR("EOF(%s)\n", sentfile); break; } --ctloffset; } if ((outsentfile = cmd_ln_str_r(config, "-outsent")) != NULL) { if ((outsentfp = fopen(outsentfile, "w")) == NULL) E_FATAL_SYSTEM("Failed to open file %s for writing", outsentfile); } if ((outctlfile = cmd_ln_str_r(config, "-outctl")) != NULL) { if ((outctlfp = fopen(outctlfile, "w")) == NULL) E_FATAL_SYSTEM("Failed top open file %s for writing", outctlfile); } if ((cmd_ln_str_r(config, "-s2stsegdir") == NULL) && (cmd_ln_str_r(config, "-stsegdir") == NULL) && (cmd_ln_str_r(config, "-phlabdir") == NULL) && (cmd_ln_str_r(config, "-phsegdir") == NULL) && (cmd_ln_str_r(config, "-wdsegdir") == NULL) && (cmd_ln_str_r(config, "-outsent") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); /* Read in input databases */ models_init(config); if (!feat) feat = feat_array_alloc(kbcore_fcb(kbc), S3_MAX_FRAMES); timers[tmr_utt].name = "U"; timers[tmr_gauden].name = "G"; timers[tmr_senone].name = "S"; timers[tmr_align].name = "A"; /* Initialize align module */ align_init(kbc->mdef, kbc->tmat, dict, config, kbc->logmath); printf("\n"); if (cmd_ln_str_r(config, "-mllr") != NULL) { if (kbc->mgau) adapt_set_mllr(adapt_am, kbc->mgau, cmd_ln_str_r(config, "-mllr"), NULL, kbc->mdef, config); else if (kbc->ms_mgau) model_set_mllr(kbc->ms_mgau, cmd_ln_str_r(config, "-mllr"), NULL, kbcore_fcb(kbc), kbc->mdef, config); else E_WARN("Can't use MLLR matrices with .s2semi. yet\n"); } tot_nfr = 0; /* process_ctlfile (); */ if (cmd_ln_str_r(config, "-ctl")) { /* When -ctlfile is speicified, corpus.c will look at -ctl_mllr to get the corresponding MLLR for the utterance */ ctl_process(cmd_ln_str_r(config, "-ctl"), NULL, cmd_ln_str_r(config, "-ctl_mllr"), cmd_ln_int32_r(config, "-ctloffset"), cmd_ln_int32_r(config, "-ctlcount"), utt_align, config); } else { E_FATAL(" -ctl are not specified.\n"); } if (tot_nfr > 0) { printf("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_cpu, tm_utt.t_tot_cpu / (tot_nfr * 0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt.t_tot_elapsed, tm_utt.t_tot_elapsed / (tot_nfr * 0.01)); } if (outsentfp) fclose(outsentfp); if (outctlfp) fclose(outctlfp); if (sentfp) fclose(sentfp); ckd_free(s2stsegdir); ckd_free(stsegdir); ckd_free(phsegdir); ckd_free(wdsegdir); feat_array_free(feat); align_free(); models_free(); #if (! WIN32) system("ps aguxwww | grep s3align"); #endif cmd_ln_free_r(config); return 0; }
static int normalize() { char file_name[MAXPATHLEN+1]; float32 ***mixw_acc = NULL; float32 ***in_mixw = NULL; float64 s; uint32 n_mixw; uint32 n_stream; uint32 n_mllr_class; uint32 n_density; float32 ***tmat_acc = NULL; uint32 n_tmat; uint32 n_state_pm; uint32 i, j, k; vector_t ***in_mean = NULL; vector_t ***wt_mean = NULL; vector_t ***in_var = NULL; vector_t ***wt_var = NULL; vector_t ****in_fullvar = NULL; vector_t ****wt_fullvar = NULL; int32 pass2var = FALSE; int32 var_is_full = FALSE; float32 ***dnom = NULL; uint32 n_mgau; uint32 n_gau_stream; uint32 n_gau_density; const uint32 *veclen = NULL; const char **accum_dir; const char *oaccum_dir; const char *in_mixw_fn; const char *out_mixw_fn; const char *out_tmat_fn; const char *in_mean_fn; const char *out_mean_fn; const char *in_var_fn; const char *out_var_fn; const char *out_dcount_fn; int err; uint32 mllr_mult; uint32 mllr_add; float32 *****regl = NULL; float32 ****regr = NULL; uint32 no_retries=0; accum_dir = cmd_ln_str_list("-accumdir"); oaccum_dir = cmd_ln_str("-oaccumdir"); out_mixw_fn = cmd_ln_str("-mixwfn"); out_tmat_fn = cmd_ln_str("-tmatfn"); out_mean_fn = cmd_ln_str("-meanfn"); out_var_fn = cmd_ln_str("-varfn"); in_mixw_fn = cmd_ln_str("-inmixwfn"); in_mean_fn = cmd_ln_str("-inmeanfn"); in_var_fn = cmd_ln_str("-invarfn"); out_dcount_fn = cmd_ln_str("-dcountfn"); var_is_full = cmd_ln_int32("-fullvar"); /* must be at least one accum dir */ assert(accum_dir[0] != NULL); if (out_mixw_fn == NULL) { E_INFO("No -mixwfn specified, will skip if any\n"); } if (out_tmat_fn == NULL) { E_INFO("No -tmatfn specified, will skip if any\n"); } if (out_mean_fn == NULL) { E_INFO("No -meanfn specified, will skip if any\n"); } if (out_var_fn == NULL) { E_INFO("No -varfn specified, will skip if any\n"); } if (in_mixw_fn != NULL) { E_INFO("Selecting unseen mixing weight parameters from %s\n", in_mixw_fn); } if (in_mean_fn != NULL) { E_INFO("Selecting unseen density mean parameters from %s\n", in_mean_fn); if (s3gau_read(in_mean_fn, &in_mean, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)veclen); veclen = NULL; } if (in_var_fn != NULL) { E_INFO("Selecting unseen density variance parameters from %s\n", in_var_fn); if (var_is_full) { if (s3gau_read_full(in_var_fn, &in_fullvar, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } } else { if (s3gau_read(in_var_fn, &in_var, &n_mgau, &n_gau_stream, &n_gau_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } } ckd_free((void *)veclen); veclen = NULL; } n_stream = 0; for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); if (out_mixw_fn) { rdacc_mixw(accum_dir[i], &mixw_acc, &n_mixw, &n_stream, &n_density); } if (out_tmat_fn) { rdacc_tmat(accum_dir[i], &tmat_acc, &n_tmat, &n_state_pm); } if (out_mean_fn || out_var_fn) { if (var_is_full) rdacc_den_full(accum_dir[i], &wt_mean, &wt_fullvar, &pass2var, &dnom, &n_mgau, &n_gau_stream, &n_gau_density, &veclen); else rdacc_den(accum_dir[i], &wt_mean, &wt_var, &pass2var, &dnom, &n_mgau, &n_gau_stream, &n_gau_density, &veclen); if (out_mixw_fn) { if (n_stream != n_gau_stream) { E_ERROR("mixw inconsistent w/ densities WRT # " "streams (%u != %u)\n", n_stream, n_gau_stream); } if (n_density != n_gau_density) { E_ERROR("mixw inconsistent w/ densities WRT # " "den/mix (%u != %u)\n", n_density, n_gau_density); } } else { n_stream = n_gau_stream; n_density = n_gau_density; } } } if (oaccum_dir && mixw_acc) { /* write the total mixing weight reest. accumulators */ err = 0; sprintf(file_name, "%s/mixw_counts", oaccum_dir); if (in_mixw_fn) { if (s3mixw_read(in_mixw_fn, &in_mixw, &i, &j, &k) != S3_SUCCESS) { E_FATAL_SYSTEM("Unable to read %s", in_mixw_fn); } if (i != n_mixw) { E_FATAL("# mixw in input mixw file != # mixw in output mixw file\n"); } if (j != n_stream) { E_FATAL("# stream in input mixw file != # stream in output mixw file\n"); } if (k != n_density) { E_FATAL("# density in input mixw file != # density in output mixw file\n"); } for (i = 0; i < n_mixw; i++) { for (j = 0; j < n_stream; j++) { for (k = 0, s = 0; k < n_density; k++) { s += mixw_acc[i][j][k]; } if ((s == 0) && in_mixw) { for (k = 0, s = 0; k < n_density; k++) { mixw_acc[i][j][k] = in_mixw[i][j][k]; } E_INFO("set mixw %u stream %u to input mixw value\n", i, j); } } } } do { /* Write out the accumulated reestimation sums */ if (s3mixw_write(file_name, mixw_acc, n_mixw, n_stream, n_density) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 30 seconds).\n "); } } } while (err > 1); } if (pass2var) E_INFO("-2passvar yes\n"); if (oaccum_dir && (wt_mean || wt_var || wt_fullvar)) { /* write the total mixing Gau. den reest. accumulators */ err = 0; sprintf(file_name, "%s/gauden_counts", oaccum_dir); do { int32 rv; if (var_is_full) rv = s3gaucnt_write_full(file_name, wt_mean, wt_fullvar, pass2var, dnom, n_mgau, n_gau_stream, n_gau_density, veclen); else rv = s3gaucnt_write(file_name, wt_mean, wt_var, pass2var, dnom, n_mgau, n_gau_stream, n_gau_density, veclen); if (rv != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (oaccum_dir && tmat_acc) { /* write the total transition matrix reest. accumulators */ err = 0; sprintf(file_name, "%s/tmat_counts", oaccum_dir); do { if (s3tmat_write(file_name, tmat_acc, n_tmat, n_state_pm) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (oaccum_dir && regr && regl) { /* write the total MLLR regression matrix accumulators */ err = 0; sprintf(file_name, "%s/regmat_counts", oaccum_dir); do { if (s3regmatcnt_write(file_name, regr, regl, n_mllr_class, n_stream, veclen, mllr_mult, mllr_add) != S3_SUCCESS) { if (err == 0) { E_ERROR("Unable to write %s; Retrying...\n", file_name); } ++err; sleep(3); no_retries++; if(no_retries>10){ E_FATAL("Failed to get the files after 10 retries(about 5 minutes).\n "); } } } while (err > 1); } if (wt_mean || wt_var || wt_fullvar) { if (out_mean_fn) { E_INFO("Normalizing mean for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mean(in_mean, wt_mean, dnom, n_mgau, n_stream, n_density, veclen); } else { if (wt_mean) { E_INFO("Ignoring means since -meanfn not specified\n"); } } if (out_var_fn) { if (var_is_full) { if (wt_fullvar) { E_INFO("Normalizing fullvar\n"); gauden_norm_wt_fullvar(in_fullvar, wt_fullvar, pass2var, dnom, wt_mean, /* wt_mean now just mean */ n_mgau, n_stream, n_density, veclen, cmd_ln_boolean("-tiedvar")); } } else { if (wt_var) { E_INFO("Normalizing var\n"); gauden_norm_wt_var(in_var, wt_var, pass2var, dnom, wt_mean, /* wt_mean now just mean */ n_mgau, n_stream, n_density, veclen, cmd_ln_boolean("-tiedvar")); } } } else { if (wt_var || wt_fullvar) { E_INFO("Ignoring variances since -varfn not specified\n"); } } } else { E_INFO("No means or variances to normalize\n"); } /* * Write the parameters to files */ if (out_mixw_fn) { if (mixw_acc) { if (s3mixw_write(out_mixw_fn, mixw_acc, n_mixw, n_stream, n_density) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO mixing weight accumulators seen, but -mixwfn specified.\n"); } } else { if (mixw_acc) { E_INFO("Mixing weight accumulators seen, but -mixwfn NOT specified.\n"); } } if (out_tmat_fn) { if (tmat_acc) { if (s3tmat_write(out_tmat_fn, tmat_acc, n_tmat, n_state_pm) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO transition matrix accumulators seen, but -tmatfn specified.\n"); } } else { if (tmat_acc) E_INFO("Transition matrix accumulators seen, but -tmatfn NOT specified\n"); } if (out_mean_fn) { if (wt_mean) { if (s3gau_write(out_mean_fn, (const vector_t ***)wt_mean, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; if (out_dcount_fn) { if (s3gaudnom_write(out_dcount_fn, dnom, n_mgau, n_stream, n_density) != S3_SUCCESS) return S3_ERROR; } } else E_WARN("NO reestimated means seen, but -meanfn specified\n"); } else { if (wt_mean) { E_INFO("Reestimated means seen, but -meanfn NOT specified\n"); } } if (out_var_fn) { if (var_is_full) { if (wt_fullvar) { if (s3gau_write_full(out_var_fn, (const vector_t ****)wt_fullvar, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else E_WARN("NO reestimated variances seen, but -varfn specified\n"); } else { if (wt_var) { if (s3gau_write(out_var_fn, (const vector_t ***)wt_var, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) return S3_ERROR; } else E_WARN("NO reestimated variances seen, but -varfn specified\n"); } } else { if (wt_var) { E_INFO("Reestimated variances seen, but -varfn NOT specified\n"); } } if (veclen) ckd_free((void *)veclen); return S3_SUCCESS; }
jsgf_rule_t * jsgf_import_rule(jsgf_t * jsgf, char *name) { char *c, *path, *newpath; size_t namelen, packlen; void *val; jsgf_t *imp; int import_all; /* Trim the leading and trailing <> */ namelen = strlen(name); path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ strcpy(path, name + 1); /* Split off the first part of the name */ c = strrchr(path, '.'); if (c == NULL) { E_ERROR("Imported rule is not qualified: %s\n", name); ckd_free(path); return NULL; } packlen = c - path; *c = '\0'; /* Look for import foo.* */ import_all = (strlen(name) > 2 && 0 == strcmp(name + namelen - 3, ".*>")); /* Construct a filename. */ for (c = path; *c; ++c) if (*c == '.') *c = '/'; strcat(path, ".gram"); newpath = path_list_search(jsgf->searchpath, path); if (newpath == NULL) { E_ERROR("Failed to find grammar %s\n", path); ckd_free(path); return NULL; } ckd_free(path); path = newpath; E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); /* FIXME: Also, we need to make sure that path is fully qualified * here, by adding any prefixes from jsgf->name to it. */ /* See if we have parsed it already */ if (hash_table_lookup(jsgf->imports, path, &val) == 0) { E_INFO("Already imported %s\n", path); imp = val; ckd_free(path); } else { /* If not, parse it. */ imp = jsgf_parse_file(path, jsgf); val = hash_table_enter(jsgf->imports, path, imp); if (val != (void *) imp) { E_WARN("Multiply imported file: %s\n", path); } } if (imp != NULL) { hash_iter_t *itor; /* Look for public rules matching rulename. */ for (itor = hash_table_iter(imp->rules); itor; itor = hash_table_iter_next(itor)) { hash_entry_t *he = itor->ent; jsgf_rule_t *rule = hash_entry_val(he); int rule_matches; char *rule_name = importname2rulename(name); if (import_all) { /* Match package name (symbol table is shared) */ rule_matches = !strncmp(rule_name, rule->name, packlen + 1); } else { /* Exact match */ rule_matches = !strcmp(rule_name, rule->name); } ckd_free(rule_name); if (rule->is_public && rule_matches) { void *val; char *newname; /* Link this rule into the current namespace. */ c = strrchr(rule->name, '.'); assert(c != NULL); newname = jsgf_fullname(jsgf, c); E_INFO("Imported %s\n", newname); val = hash_table_enter(jsgf->rules, newname, jsgf_rule_retain(rule)); if (val != (void *) rule) { E_WARN("Multiply defined symbol: %s\n", newname); } if (!import_all) { hash_table_iter_free(itor); return rule; } } } } return NULL; }
static int32 gauden_param_read(vector_t **** out_param, /* Alloc space iff *out_param == NULL */ int32 * out_n_mgau, int32 * out_n_feat, int32 * out_n_density, int32 ** out_veclen, const char *file_name) { char tmp; FILE *fp; int32 i, j, k, l, n, blk; int32 n_mgau; int32 n_feat; int32 n_density; int32 *veclen; int32 byteswap, chksum_present; vector_t ***out; float32 *buf; char **argname, **argval; uint32 chksum; E_INFO("Reading mixture gaussian parameter: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], GAUDEN_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* #Codebooks */ if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#codebooks) failed\n", file_name); *out_n_mgau = n_mgau; /* #Features/codebook */ if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#features) failed\n", file_name); *out_n_feat = n_feat; /* #Gaussian densities/feature in each codebook */ if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (#density/codebook) failed\n", file_name); *out_n_density = n_density; /* #Dimensions in each feature stream */ veclen = ckd_calloc(n_feat, sizeof(uint32)); *out_veclen = veclen; if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) != n_feat) E_FATAL("fread(%s) (feature-lengths) failed\n", file_name); /* blk = total vector length of all feature streams */ for (i = 0, blk = 0; i < n_feat; i++) blk += veclen[i]; /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (total #floats) failed\n", file_name); if (n != n_mgau * n_density * blk) { E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, n, n_mgau, n_density, blk); } /* Allocate memory for mixture gaussian densities if not already allocated */ if (!(*out_param)) { out = (vector_t ***) ckd_calloc_3d(n_mgau, n_feat, n_density, sizeof(vector_t)); buf = (float32 *) ckd_calloc(n, sizeof(float)); for (i = 0, l = 0; i < n_mgau; i++) { for (j = 0; j < n_feat; j++) { for (k = 0; k < n_density; k++) { out[i][j][k] = &buf[l]; l += veclen[j]; } } } } else { out = *out_param; buf = out[0][0][0]; } /* Read mixture gaussian densities data */ if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) E_FATAL("fread(%s) (densitydata) failed\n", file_name); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&tmp, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); *out_param = out; E_INFO("%d codebook, %d feature, size\n", n_mgau, n_feat); for (i = 0; i < n_feat; i++) printf(" %dx%d", n_density, veclen[i]); printf("\n"); fflush(stdout); return 0; }
fe_t * fe_init_auto_r(cmd_ln_t *config) { fe_t *fe; fe = ckd_calloc(1, sizeof(*fe)); fe->refcount = 1; /* transfer params to front end */ if (fe_parse_general_params(cmd_ln_retain(config), fe) < 0) { fe_free(fe); return NULL; } /* compute remaining fe parameters */ /* We add 0.5 so approximate the float with the closest * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 */ fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); fe->prior = 0; fe->frame_counter = 0; assert (fe->frame_shift > 1); if (fe->frame_size > (fe->fft_size)) { E_WARN ("Number of FFT points has to be a power of 2 higher than %d\n", (fe->frame_size)); fe_free(fe); return (NULL); } if (fe->dither) fe_init_dither(fe->seed); /* establish buffers for overflow samps and hamming window */ fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(int16)); fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); /* create hamming window */ fe_create_hamming(fe->hamming_window, fe->frame_size); /* init and fill appropriate filter structure */ fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); /* transfer params to mel fb */ fe_parse_melfb_params(config, fe, fe->mel_fb); fe_build_melfilters(fe->mel_fb); fe_compute_melcosine(fe->mel_fb); /* Create temporary FFT, spectrum and mel-spectrum buffers. */ /* FIXME: Gosh there are a lot of these. */ fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); /* create twiddle factors */ fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); fe_create_twiddle(fe); if (cmd_ln_boolean_r(config, "-verbose")) { fe_print_current(fe); } /*** Z.A.B. ***/ /*** Initialize the overflow buffers ***/ fe_start_utt(fe); return fe; }
int32 vithist_utt_end(vithist_t * vh, ngram_model_t *lm, s3dict_t *dict, dict2pid_t *dict2pid, fillpen_t *fp) { int32 f, i; int32 sv, nsv, scr, bestscore, bestvh, vhid; vithist_entry_t *ve, *bestve = 0; int32 endwid = NGRAM_INVALID_WID; bestscore = MAX_NEG_INT32; bestvh = -1; /* Find last frame with entries in vithist table */ /* by ARCHAN 20050525, it is possible that the last frame will not be reached in decoding */ for (f = vh->n_frm - 1; f >= 0; --f) { sv = vh->frame_start[f]; /* First vithist entry in frame f */ nsv = vh->frame_start[f + 1]; /* First vithist entry in next frame (f+1) */ if (sv < nsv) break; } if (f < 0) return -1; if (f != vh->n_frm - 1) E_WARN("No word exit in frame %d, using exits from frame %d\n", vh->n_frm - 1, f); /* Terminate in a final </s> node (make this optional?) */ endwid = ngram_wid(lm, S3_FINISH_WORD); for (i = sv; i < nsv; i++) { int n_used; ve = vithist_id2entry(vh, i); scr = ve->path.score; scr += ngram_tg_score(lm, endwid, ve->lmstate.lm3g.lwid[0], ve->lmstate.lm3g.lwid[1], &n_used); if (bestscore < scr) { bestscore = scr; bestvh = i; bestve = ve; } } assert(bestvh >= 0); if (f != vh->n_frm - 1) { E_ERROR("No word exit in frame %d, using exits from frame %d\n", vh->n_frm - 1, f); /* Add a dummy silwid covering the remainder of the utterance */ assert(vh->frame_start[vh->n_frm - 1] == vh->frame_start[vh->n_frm]); vh->n_frm -= 1; vithist_rescore(vh, lm, dict, dict2pid, fp, s3dict_silwid(dict), vh->n_frm, bestve->path.score, bestvh, -1, -1); vh->n_frm += 1; vh->frame_start[vh->n_frm] = vh->n_entry; return vithist_utt_end(vh, lm, dict, dict2pid, fp); } /* vithist_dump(vh,-1,kbc,stdout); */ /* Create an </s> entry */ ve = vithist_entry_alloc(vh); ve->wid = s3dict_finishwid(dict); ve->sf = (bestve->ef == BAD_S3FRMID) ? 0 : bestve->ef + 1; ve->ef = vh->n_frm; ve->ascr = 0; ve->lscr = bestscore - bestve->path.score; ve->path.score = bestscore; ve->path.pred = bestvh; ve->type = 0; ve->valid = 1; ve->lmstate.lm3g.lwid[0] = endwid; ve->lmstate.lm3g.lwid[1] = ve->lmstate.lm3g.lwid[0]; vhid = vh->n_entry - 1; /* vithist_dump(vh,-1,kbc,stdout); */ return vhid; }
static char const * phone_loop_search_hyp(ps_search_t *search, int32 *out_score, int32 *out_is_final) { E_WARN("Hypotheses are not returned from phone loop search"); return NULL; }
int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) { const char *path; const char *keyword_list; int32 lw; if (config && config != ps->config) { cmd_ln_free_r(ps->config); ps->config = cmd_ln_retain(config); } err_set_debug_level(cmd_ln_int32_r(ps->config, "-debug")); ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); /* Fill in some default arguments. */ ps_init_defaults(ps); /* Free old searches (do this before other reinit) */ ps_free_searches(ps); ps->searches = hash_table_new(3, HASH_CASE_YES); /* Free old acmod. */ acmod_free(ps->acmod); ps->acmod = NULL; /* Free old dictionary (must be done after the two things above) */ dict_free(ps->dict); ps->dict = NULL; /* Free d2p */ dict2pid_free(ps->d2p); ps->d2p = NULL; /* Logmath computation (used in acmod and search) */ if (ps->lmath == NULL || (logmath_get_base(ps->lmath) != (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { if (ps->lmath) logmath_free(ps->lmath); ps->lmath = logmath_init ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, cmd_ln_boolean_r(ps->config, "-bestpath")); } /* Acoustic model (this is basically everything that * uttproc.c, senscr.c, and others used to do) */ if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) return -1; if ((ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"))) { /* Initialize an auxiliary phone loop search, which will run in * "parallel" with FSG or N-Gram search. */ if ((ps->phone_loop = phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) return -1; hash_table_enter(ps->searches, ckd_salloc(ps_search_name(ps->phone_loop)), ps->phone_loop); } /* Dictionary and triphone mappings (depends on acmod). */ /* FIXME: pass config, change arguments, implement LTS, etc. */ if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) return -1; if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) return -1; lw = cmd_ln_float32_r(config, "-lw"); /* Determine whether we are starting out in FSG or N-Gram search mode. * If neither is used skip search initialization. */ /* Load KWS if one was specified in config */ if ((keyword_list = cmd_ln_str_r(config, "-kws"))) { if (ps_set_kws(ps, PS_DEFAULT_SEARCH, keyword_list)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } /* Load an FSG if one was specified in config */ if ((path = cmd_ln_str_r(config, "-fsg"))) { fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); if (!fsg) return -1; if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) return -1; ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(config, "-jsgf"))) { /* Or load a JSGF grammar */ fsg_model_t *fsg; jsgf_rule_t *rule; char const *toprule; jsgf_t *jsgf = jsgf_parse_file(path, NULL); if (!jsgf) return -1; rule = NULL; /* Take the -toprule if specified. */ if ((toprule = cmd_ln_str_r(config, "-toprule"))) { char *ruletok; ruletok = string_join("<", toprule, ">", NULL); rule = jsgf_get_rule(jsgf, ruletok); ckd_free(ruletok); if (rule == NULL) { E_ERROR("Start rule %s not found\n", toprule); return -1; } } else { /* Otherwise, take the first public rule. */ jsgf_rule_iter_t *itor; for (itor = jsgf_rule_iter(jsgf); itor; itor = jsgf_rule_iter_next(itor)) { rule = jsgf_rule_iter_rule(itor); if (jsgf_rule_public(rule)) { jsgf_rule_iter_free(itor); break; } } if (rule == NULL) { E_ERROR("No public rules found in %s\n", path); return -1; } } fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg); fsg_model_free(fsg); ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(ps->config, "-lm"))) { ngram_model_t *lm; lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); if (!lm) return -1; if (ps_set_lm(ps, PS_DEFAULT_SEARCH, lm)) { ngram_model_free(lm); return -1; } ngram_model_free(lm); ps_set_search(ps, PS_DEFAULT_SEARCH); } if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { const char *name; ngram_model_t *lmset; ngram_model_set_iter_t *lmset_it; if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { E_ERROR("Failed to read language model control file: %s\n", path); return -1; } for(lmset_it = ngram_model_set_iter(lmset); lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); E_INFO("adding search %s\n", name); if (ps_set_lm(ps, name, lm)) { ngram_model_free(lm); ngram_model_set_iter_free(lmset_it); return -1; } ngram_model_free(lm); } name = cmd_ln_str_r(config, "-lmname"); if (name) ps_set_search(ps, name); else E_WARN("No default LM name (-lmname) for `-lmctl'\n"); } /* Initialize performance timer. */ ps->perf.name = "decode"; ptmr_init(&ps->perf); return 0; }
static ps_seg_t * phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score) { E_WARN("Hypotheses are not returned from phone loop search"); return NULL; }
int32 logs3_init (float64 base) { int32 i, k; float64 d, t, f; E_INFO("Initializing logbase: %e\n", base); if (base <= 1.0) E_FATAL("Illegal logbase: %e; must be > 1.0\n", base); if (add_tbl) { if (B == base) E_WARN("logs3_init() already done\n"); else E_FATAL("logs3_init() already done with base %e\n", B); } B = base; logB = log(base); invlogB = 1.0/logB; invlog10B = 1.0/log10(base); /* Create add-table for adding probs in log domain */ k = (int32) (log(2.0)*invlogB + 0.5); if (k > 65535) { E_ERROR("Logbase too small: %e; needs int32 addtable[]\n", base); return -1; } d = 1.0; f = 1.0/B; /* Figure out size of add-table requried */ for (i = 0;; i++) { t = log(1.0+d)*invlogB; k = (int32) (t + 0.5); #if 0 if (((i%1000) == 0) || (k == 0)) printf ("%10d %10d %e\n", i, k, d); #endif if (k == 0) break; d *= f; } add_tbl_size = i+1; add_tbl = (uint16 *) ckd_calloc (i+1, sizeof(uint16)); /* Fill add-table */ d = 1.0; for (i = 0;; i++) { t = log(1.0+d)*invlogB; k = (int32) (t + 0.5); add_tbl[i] = k; if (k == 0) break; d *= f; } E_INFO("Log-Add table size = %d\n", add_tbl_size); return 0; }
static int32 gauden_param_read(float32 *****out_param, int32 *out_n_mgau, int32 *out_n_feat, int32 *out_n_density, int32 **out_veclen, const char *file_name) { char version[1024], tmp; FILE *fp; int32 i, j, k, l, blk, n; int32 n_mgau; int32 n_feat; int32 n_density; int32 *veclen; int32 needs_reorder; float32 ****out; float32 *buf; E_INFO("Reading mixture gaussian parameter: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); if (fscanf(fp, "%s", version) != 1) E_FATAL("Unable to read version id\n"); if (strcmp(version, GAUDEN_PARAM_VERSION) != 0) E_FATAL("Version mismatch: %s, expecting %s\n", version, GAUDEN_PARAM_VERSION); if (bcomment_read(fp) != S3_SUCCESS) E_FATAL("bcomment_read() failed\n"); if ((needs_reorder = swap_check(fp)) < 0) E_FATAL("swap_check() failed\n"); /* #Codebooks */ if (fread_retry(&n_mgau, sizeof(uint32), 1, fp) != 1) E_FATAL("Error reading #codebooks\n"); if (needs_reorder) { SWAP_INT32(&n_mgau); } *out_n_mgau = n_mgau; /* #Features/codebook */ if (fread_retry(&n_feat, sizeof(uint32), 1, fp) != 1) E_FATAL("Error reading #features/codebook\n"); if (needs_reorder) { SWAP_INT32(&n_feat); } *out_n_feat = n_feat; /* #Gaussian densities/feature in each codebook */ if (fread_retry(&n_density, sizeof(uint32), 1, fp) != 1) E_FATAL("Error reading #densities/codebook-feature\n"); if (needs_reorder) { SWAP_INT32(&n_density); } *out_n_density = n_density; /* #Dimensions in each feature stream */ veclen = ckd_calloc(n_feat, sizeof(uint32)); *out_veclen = veclen; if (fread_retry(veclen, sizeof(uint32), n_feat, fp) != n_feat) E_FATAL("Error reading feature vector lengths\n"); if (needs_reorder) { for (i = 0; i < n_feat; i++) SWAP_INT32(&veclen[i]); } /* blk = total vector length of all feature streams */ for (i = 0, blk = 0; i < n_feat; i++) blk += veclen[i]; /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ if (fread_retry(&n, sizeof(uint32), 1, fp) != 1) E_FATAL("Error reading #floats\n"); if (needs_reorder) { SWAP_INT32(&n); } assert(n == n_mgau * n_density * blk); /* Allocate memory for mixture gaussian densities */ out = (float32 ****) ckd_calloc_3d (n_mgau, n_feat, n_density, sizeof(float32 *)); buf = (float32 *) ckd_calloc (n, sizeof(float)); for (i = 0, l = 0; i < n_mgau; i++) { for (j = 0; j < n_feat; j++) { for (k = 0; k < n_density; k++) { out[i][j][k] = &buf[l]; l += veclen[j]; } } } /* Read mixture gaussian densities data */ if (fread_retry (buf, sizeof(float32), n, fp) != n) E_FATAL("Error reading gaussian data\n"); if (needs_reorder) for (i = 0; i < n; i++) SWAP_FLOAT32(&buf[i]); E_INFO("%d codebook, %d feature, size", n_mgau, n_feat); for (i = 0; i < n_feat; i++) printf (" %dx%d", n_density, veclen[i]); printf ("\n"); if (fread (&tmp, 1, 1, fp) == 1) E_WARN("Non-empty file beyond end of data\n"); *out_param = out; fclose(fp); return 0; }
int main(int argc, char *argv[]) { lexicon_t *lex; model_def_t *omdef; model_def_t *dmdef; feat_t *feat; uint32 n_stream, blksize; uint32 *veclen; uint32 ts_off; uint32 ts_cnt; FILE *fp; if (main_initialize(argc, argv, &lex, &omdef, &dmdef, &feat) != S3_SUCCESS) { return -1; } n_stream = feat_dimension1(feat); veclen = feat_stream_lengths(feat); blksize = feat_dimension(feat); if (strcmp(cmd_ln_str("-gthobj"), "state") == 0) { ts_off = cmd_ln_int32("-tsoff"); if (cmd_ln_str("-tscnt") == NULL) { ts_cnt = omdef->n_tied_state - ts_off; } else { ts_cnt = cmd_ln_int32("-tscnt"); } if (ts_off + ts_cnt > omdef->n_tied_state) { E_FATAL("Too many tied states specified\n"); } n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), cmd_ln_str("-segidxfn"), cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), ts_off, ts_cnt, omdef->n_tied_state, (dmdef != NULL ? dmdef->n_tied_state : omdef->n_tied_state)) != S3_SUCCESS) { E_ERROR("Unable to train [%u %u]\n", ts_off, ts_off+ts_cnt-1); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } if (cmd_ln_str("-tsrngfn") != NULL) { fp = fopen(cmd_ln_str("-tsrngfn"), "w"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", cmd_ln_str("-tsrngfn")); } fprintf(fp, "%d %d\n", ts_off, ts_cnt); } else if (ts_cnt != omdef->n_tied_state) { E_WARN("Subset of tied states specified, but no -tsrngfn arg"); } } else if (strcmp(cmd_ln_str("-gthobj"), "single") == 0) { n_tot_frame = 0; ptmr_reset(&all_timer); ptmr_reset(&km_timer); ptmr_reset(&var_timer); ptmr_reset(&em_timer); ptmr_start(&all_timer); if (init_state(cmd_ln_str("-segdmpfn"), NULL, /* No index -> single class dump file */ cmd_ln_int32("-ndensity"), n_stream, veclen, blksize, cmd_ln_int32("-reest"), cmd_ln_str("-mixwfn"), cmd_ln_str("-meanfn"), cmd_ln_str("-varfn"), 0, 1, 1, 1) != S3_SUCCESS) { E_ERROR("Unable to train\n"); } ptmr_stop(&all_timer); if (n_tot_frame > 0) { E_INFO("TOTALS:"); E_INFOCONT(" km %4.3fx %4.3e", km_timer.t_cpu / (n_tot_frame * 0.01), (km_timer.t_cpu > 0 ? km_timer.t_elapsed / km_timer.t_cpu : 0.0)); E_INFOCONT(" var %4.3fx %4.3e", var_timer.t_cpu / (n_tot_frame * 0.01), (var_timer.t_cpu > 0 ? var_timer.t_elapsed / var_timer.t_cpu : 0.0)); E_INFOCONT(" em %4.3fx %4.3e", em_timer.t_cpu / (n_tot_frame * 0.01), (em_timer.t_cpu > 0 ? em_timer.t_elapsed / em_timer.t_cpu : 0.0)); E_INFOCONT(" all %4.3fx %4.3e", all_timer.t_cpu / (n_tot_frame * 0.01), (all_timer.t_cpu > 0 ? all_timer.t_elapsed / all_timer.t_cpu : 0.0)); E_INFOCONT("\n"); } } return 0; }
main (int32 argc, char *argv[]) { char *str; #if 0 ckd_debug(100000); #endif E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__); /* Digest command line argument definitions */ cmd_ln_define (defn); if ((argc == 2) && (strcmp (argv[1], "help") == 0)) { cmd_ln_print_definitions(); exit(1); } /* Look for default or specified arguments file */ str = NULL; if ((argc == 2) && (argv[1][0] != '-')) str = argv[1]; else if (argc == 1) { str = "s3align.arg"; E_INFO("Looking for default argument file: %s\n", str); } if (str) { /* Build command line argument list from file */ if ((argc = load_argfile (str, argv[0], &argv)) < 0) { fprintf (stderr, "Usage:\n"); fprintf (stderr, "\t%s argument-list, or\n", argv[0]); fprintf (stderr, "\t%s [argument-file] (default file: s3align.arg)\n\n", argv[0]); cmd_ln_print_definitions(); exit(1); } } cmdline_parse (argc, argv); if ((cmd_ln_access("-mdeffn") == NULL) || (cmd_ln_access("-meanfn") == NULL) || (cmd_ln_access("-varfn") == NULL) || (cmd_ln_access("-mixwfn") == NULL) || (cmd_ln_access("-tmatfn") == NULL) || (cmd_ln_access("-dictfn") == NULL)) E_FATAL("Missing -mdeffn, -meanfn, -varfn, -mixwfn, -tmatfn, or -dictfn argument\n"); if ((cmd_ln_access("-ctlfn") == NULL) || (cmd_ln_access("-insentfn") == NULL)) E_FATAL("Missing -ctlfn or -insentfn argument\n"); if ((cmd_ln_access ("-s2stsegdir") == NULL) && (cmd_ln_access ("-stsegdir") == NULL) && (cmd_ln_access ("-phsegdir") == NULL) && (cmd_ln_access ("-wdsegdir") == NULL) && (cmd_ln_access ("-outsentfn") == NULL)) E_FATAL("Missing output file/directory argument(s)\n"); tm_utt = timing_new (); /* * Initialize log(S3-base). All scores (probs...) computed in log domain to avoid * underflow. At the same time, log base = 1.0001 (1+epsilon) to allow log values * to be maintained in int32 variables without significant loss of precision. */ if (cmd_ln_access("-logbase") == NULL) logs3_init (1.0001); else { float32 logbase; logbase = *((float32 *) cmd_ln_access("-logbase")); if (logbase <= 1.0) E_FATAL("Illegal log-base: %e; must be > 1.0\n", logbase); if (logbase > 1.1) E_WARN("Logbase %e perhaps too large??\n", logbase); logs3_init ((float64) logbase); } /* Initialize feature stream type */ feat_init ((char *) cmd_ln_access ("-feat")); /* BHIKSHA: PASS CEPSIZE TO FEAT_CEPSIZE, 6 Jan 98 */ cepsize = *((int32 *) cmd_ln_access("-ceplen")); cepsize = feat_cepsize (cepsize); /* END CHANGES BY BHIKSHA */ /* Read in input databases */ models_init (); senscale = (int32 *) ckd_calloc (S3_MAX_FRAMES, sizeof(int32)); tmr_utt = cyctimer_new ("U"); tmr_gauden = cyctimer_new ("G"); tmr_senone = cyctimer_new ("S"); tmr_align = cyctimer_new ("A"); /* Initialize align module */ align_init (); printf ("\n"); tot_nfr = 0; process_ctlfile (); if (tot_nfr > 0) { printf ("\n"); printf("TOTAL FRAMES: %8d\n", tot_nfr); printf("TOTAL CPU TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_cpu, tm_utt->t_tot_cpu/(tot_nfr*0.01)); printf("TOTAL ELAPSED TIME: %11.2f sec, %7.2f xRT\n", tm_utt->t_tot_elapsed, tm_utt->t_tot_elapsed/(tot_nfr*0.01)); } #if (! WIN32) system ("ps aguxwww | grep s3align"); #endif /* Hack!! To avoid hanging problem under Linux */ if (logfp) { fclose (logfp); *stdout = orig_stdout; *stderr = orig_stderr; } exit(0); }
ps_mgau_t * ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef) { /* Codebooks */ ms_mgau_model_t *msg; ps_mgau_t *mg; gauden_t *g; senone_t *s; cmd_ln_t *config; int i; static ps_mgaufuncs_t ms_mgau_funcs = { "ms", ms_cont_mgau_frame_eval, /* frame_eval */ ms_mgau_mllr_transform, /* transform */ ms_mgau_free /* free */ }; config = acmod->config; msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t)); msg->config = config; msg->g = 0; msg->s = 0; g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"), cmd_ln_str_r(config, "-var"), cmd_ln_float32_r(config, "-varfloor"), lmath); /* Verify n_feat and veclen, against acmod. */ if (g->n_feat != feat_dimension1(acmod->fcb)) { E_ERROR("Number of streams does not match: %d != %d\n", g->n_feat, feat_dimension1(acmod->fcb)); goto error_out; } for (i = 0; i < g->n_feat; ++i) { if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) { E_ERROR("Dimension of stream %d does not match: %d != %d\n", i, g->featlen[i], feat_dimension2(acmod->fcb, i)); goto error_out; } } s = msg->s = senone_init(msg->g, cmd_ln_str_r(config, "-mixw"), cmd_ln_str_r(config, "-senmgau"), cmd_ln_float32_r(config, "-mixwfloor"), lmath, mdef); s->aw = cmd_ln_int32_r(config, "-aw"); /* Verify senone parameters against gauden parameters */ if (s->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, s->n_feat); if (s->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, s->n_cw); if ((int)s->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); if ((int)s->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); msg->topn = cmd_ln_int32_r(config, "-topn"); E_INFO("The value of topn: %d\n", msg->topn); if (msg->topn == 0 || msg->topn > msg->g->n_density) { E_WARN ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n", msg->topn, msg->g->n_density); msg->topn = msg->g->n_density; } msg->dist = (gauden_dist_t ***) ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn, sizeof(gauden_dist_t)); msg->mgau_active = (uint8*)ckd_calloc(g->n_mgau, sizeof(int8)); mg = (ps_mgau_t *)msg; mg->vt = &ms_mgau_funcs; return mg; error_out: ms_mgau_free(ps_mgau_base(msg)); return 0; }
/* the following function is used for MMIE training lqin 2010-03 */ static int mmi_normalize() { uint32 i; uint32 n_mgau; uint32 n_stream; uint32 n_density; vector_t ***in_mean = NULL; vector_t ***in_var = NULL; vector_t ***wt_mean = NULL; vector_t ***wt_var = NULL; const uint32 *veclen = NULL; const char **accum_dir; const char *in_mean_fn; const char *out_mean_fn; const char *in_var_fn; const char *out_var_fn; vector_t ***wt_num_mean = NULL; vector_t ***wt_den_mean = NULL; vector_t ***wt_num_var = NULL; vector_t ***wt_den_var = NULL; float32 ***num_dnom = NULL; float32 ***den_dnom = NULL; uint32 n_num_mgau; uint32 n_den_mgau; uint32 n_num_stream; uint32 n_den_stream; uint32 n_num_density; uint32 n_den_density; float32 constE; uint32 n_temp_mgau; uint32 n_temp_stream; uint32 n_temp_density; const uint32 *temp_veclen = NULL; accum_dir = cmd_ln_str_list("-accumdir"); /* the following variables are used for mmie training */ out_mean_fn = cmd_ln_str("-meanfn"); out_var_fn = cmd_ln_str("-varfn"); in_mean_fn = cmd_ln_str("-inmeanfn"); in_var_fn = cmd_ln_str("-invarfn"); constE = cmd_ln_float32("-constE"); /* get rid of some unnecessary parameters */ if (cmd_ln_int32("-fullvar")) { E_FATAL("Current MMIE training can not be done for full variance, set -fulllvar as no\n"); } if (cmd_ln_int32("-tiedvar")) { E_FATAL("Current MMIE training can not be done for tied variance, set -tiedvar as no\n"); } if (cmd_ln_str("-mixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -mixwfn \n"); } if (cmd_ln_str("-inmixwfn")) { E_FATAL("Current MMIE training does not support mixture weight update, remove -inmixwfn \n"); } if (cmd_ln_str("-tmatfn")) { E_FATAL("Current MMIE training does not support transition matrix update, remove -tmatfn \n"); } if (cmd_ln_str("-regmatfn")) { E_FATAL("Using norm for computing regression matrix is obsolete, please use mllr_transform \n"); } /* must be at least one accum dir */ if (accum_dir[0] == NULL) { E_FATAL("No accumulated reestimation path is specified, use -accumdir \n"); } /* at least update mean or variance parameters */ if (out_mean_fn == NULL && out_var_fn == NULL) { E_FATAL("Neither -meanfn nor -varfn is specified, at least do mean or variance update \n"); } else if (out_mean_fn == NULL) { E_INFO("No -meanfn specified, will skip if any\n"); } else if (out_var_fn == NULL) { E_INFO("No -varfn specified, will skip if any\n"); } /* read input mean */ if (in_mean_fn != NULL) { E_INFO("read original density mean parameters from %s\n", in_mean_fn); if (s3gau_read(in_mean_fn, &in_mean, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read input variance */ if (in_var_fn != NULL) { E_INFO("read original density variance parameters from %s\n", in_var_fn); if (s3gau_read(in_var_fn, &in_var, &n_mgau, &n_stream, &n_density, &veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)veclen); veclen = NULL; } /* read accumulated numerator and denominator counts */ for (i = 0; accum_dir[i]; i++) { E_INFO("Reading and accumulating counts from %s\n", accum_dir[i]); rdacc_mmie_den(accum_dir[i], "numlat", &wt_num_mean, &wt_num_var, &num_dnom, &n_num_mgau, &n_num_stream, &n_num_density, &veclen); rdacc_mmie_den(accum_dir[i], "denlat", &wt_den_mean, &wt_den_var, &den_dnom, &n_den_mgau, &n_den_stream, &n_den_density, &veclen); if (n_num_mgau != n_den_mgau) E_FATAL("number of gaussians inconsistent between num and den lattice\n"); else if (n_num_mgau != n_mgau) E_FATAL("number of gaussians inconsistent between imput model and accumulator (%u != %u)\n", n_mgau, n_num_mgau); if (n_num_stream != n_den_stream) E_FATAL("number of gaussian streams inconsistent between num and den lattice\n"); else if (n_num_stream != n_stream) E_FATAL("number of gaussian streams inconsistent between imput model and accumulator (%u != %u)\n", n_stream, n_num_stream); if (n_num_density != n_den_density) E_FATAL("number of gaussian densities inconsistent between num and den lattice\n"); else if (n_num_density != n_density) E_FATAL("number of gaussian densities inconsistent between imput model and accumulator (%u != %u)\n", n_density, n_num_density); } /* initialize update parameters as the input parameters */ if (out_mean_fn) { if (s3gau_read(in_mean_fn, &wt_mean, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_mean_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } if (out_var_fn) { if (s3gau_read(in_var_fn, &wt_var, &n_temp_mgau, &n_temp_stream, &n_temp_density, &temp_veclen) != S3_SUCCESS) { E_FATAL_SYSTEM("Couldn't read %s", in_var_fn); } ckd_free((void *)temp_veclen); temp_veclen = NULL; } /* update mean parameters */ if (wt_mean) { if (out_mean_fn) { E_INFO("Normalizing mean for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_mean(in_mean, wt_mean, wt_num_mean, wt_den_mean, in_var, wt_num_var, wt_den_var, num_dnom, den_dnom, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring means since -meanfn not specified\n"); } } else { E_INFO("No means to normalize\n"); } /* update variance parameters */ if (wt_var) { if (out_var_fn) { E_INFO("Normalizing variance for n_mgau= %u, n_stream= %u, n_density= %u\n", n_mgau, n_stream, n_density); gauden_norm_wt_mmie_var(in_var, wt_var, wt_num_var, wt_den_var, num_dnom, den_dnom, in_mean, wt_mean, wt_num_mean, wt_den_mean, n_mgau, n_stream, n_density, veclen, constE); } else { E_INFO("Ignoring variances since -varfn not specified\n"); } } else { E_INFO("No variances to normalize\n"); } /* write the updated mean parameters to files */ if (out_mean_fn) { if (wt_mean) { if (s3gau_write(out_mean_fn, (const vector_t ***)wt_mean, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated means seen, but -meanfn specified\n"); } } else { if (wt_mean) { E_INFO("Reestimated means seen, but -meanfn NOT specified\n"); } } /* write the updated variance parameters to files */ if (out_var_fn) { if (wt_var) { if (s3gau_write(out_var_fn, (const vector_t ***)wt_var, n_mgau, n_stream, n_density, veclen) != S3_SUCCESS) { return S3_ERROR; } } else { E_WARN("NO reestimated variances seen, but -varfn specified\n"); } } else { if (wt_var) { E_INFO("Reestimated variances seen, but -varfn NOT specified\n"); } } if (veclen) ckd_free((void *)veclen); if (temp_veclen) ckd_free((void *)temp_veclen); return S3_SUCCESS; }
fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }
int ngram_model_recode(ngram_model_t *model, const char *from, const char *to) { iconv_t ic; char *outbuf; size_t maxlen; int i, writable; hash_table_t *new_wid; /* FIXME: Need to do a special case thing for the GB-HEX encoding * used in Sphinx3 Mandarin models. */ if ((ic = iconv_open(to, from)) == (iconv_t)-1) { E_ERROR_SYSTEM("iconv_open() failed"); return -1; } /* iconv(3) is a piece of crap and won't accept a NULL out buffer, * unlike wcstombs(3). So we have to either call it over and over * again until our buffer is big enough, or call it with a huge * buffer and then copy things back to the output. We will use a * mix of these two approaches here. We'll keep a single big * buffer around, and expand it as necessary. */ maxlen = 0; for (i = 0; i < model->n_words; ++i) { if (strlen(model->word_str[i]) > maxlen) maxlen = strlen(model->word_str[i]); } /* Were word strings already allocated? */ writable = model->writable; /* Either way, we are going to allocate some word strings. */ model->writable = TRUE; /* Really should be big enough except for pathological cases. */ maxlen = maxlen * sizeof(int) + 15; outbuf = ckd_calloc(maxlen, 1); /* And, don't forget, we need to rebuild the word to unigram ID * mapping. */ new_wid = hash_table_new(model->n_words, FALSE); for (i = 0; i < model->n_words; ++i) { ICONV_CONST char *in; char *out; size_t inleft, outleft, result; start_conversion: in = (ICONV_CONST char *)model->word_str[i]; /* Yes, this assumes that we don't have any NUL bytes. */ inleft = strlen(in); out = outbuf; outleft = maxlen; while ((result = iconv(ic, &in, &inleft, &out, &outleft)) == (size_t)-1) { if (errno != E2BIG) { /* FIXME: if we already converted any words, then they * are going to be in an inconsistent state. */ E_ERROR_SYSTEM("iconv() failed"); ckd_free(outbuf); hash_table_free(new_wid); return -1; } /* Reset the internal state of conversion. */ iconv(ic, NULL, NULL, NULL, NULL); /* Make everything bigger. */ maxlen *= 2; out = outbuf = ckd_realloc(outbuf, maxlen); /* Reset the input pointers. */ in = (ICONV_CONST char *)model->word_str[i]; inleft = strlen(in); } /* Now flush a shift-out sequence, if any. */ if ((result = iconv(ic, NULL, NULL, &out, &outleft)) == (size_t)-1) { if (errno != E2BIG) { /* FIXME: if we already converted any words, then they * are going to be in an inconsistent state. */ E_ERROR_SYSTEM("iconv() failed (state reset sequence)"); ckd_free(outbuf); hash_table_free(new_wid); return -1; } /* Reset the internal state of conversion. */ iconv(ic, NULL, NULL, NULL, NULL); /* Make everything bigger. */ maxlen *= 2; outbuf = ckd_realloc(outbuf, maxlen); /* Be very evil. */ goto start_conversion; } result = maxlen - outleft; /* Okay, that was hard, now let's go shopping. */ if (writable) { /* Grow or shrink the output string as necessary. */ model->word_str[i] = ckd_realloc(model->word_str[i], result + 1); model->word_str[i][result] = '\0'; } else { /* It actually was not allocated previously, so do that now. */ model->word_str[i] = ckd_calloc(result + 1, 1); } /* Copy the new thing in. */ memcpy(model->word_str[i], outbuf, result); /* Now update the hash table. We might have terrible * collisions if a non-reversible conversion was requested., * so warn about them. */ if (hash_table_enter_int32(new_wid, model->word_str[i], i) != i) { E_WARN("Duplicate word in dictionary after conversion: %s\n", model->word_str[i]); } } ckd_free(outbuf); iconv_close(ic); /* Swap out the hash table. */ hash_table_free(model->wid); model->wid = new_wid; return 0; }