int s3gau_read_maybe_full(const char *fn, vector_t *****out, uint32 *out_n_mgau, uint32 *out_n_feat, uint32 *out_n_density, uint32 **out_veclen, uint32 need_full) { FILE *fp; const char *do_chk; const char *ver; uint32 n_mgau, n_feat, n_density; uint32 *veclen, maxveclen; uint32 blk, i, j, k, l, r, n; uint32 chksum = 0; uint32 sv_chksum, ignore = 0; float32 *raw; vector_t ****o; uint32 swap; fp = s3open(fn, "rb", &swap); if (fp == NULL) return S3_ERROR; /* check version id */ ver = s3get_gvn_fattr("version"); if (ver) { if (strcmp(ver, GAU_FILE_VERSION) != 0) { E_FATAL("Version mismatch for %s, file ver: %s != reader ver: %s\n", fn, ver, GAU_FILE_VERSION); } } else { E_FATAL("No version attribute for %s\n", fn); } /* if do_chk is non-NULL, there is a checksum after the data in the file */ do_chk = s3get_gvn_fattr("chksum0"); if (do_chk && !strcmp(do_chk, "no")) { do_chk = NULL; } if (bio_fread(&n_mgau, sizeof(uint32), 1, fp, swap, &chksum) != 1) { goto error; } if (bio_fread(&n_feat, sizeof(uint32), 1, fp, swap, &chksum) != 1) { goto error; } if (bio_fread(&n_density, sizeof(uint32), 1, fp, swap, &chksum) != 1) { goto error; } veclen = ckd_calloc(n_feat, sizeof(uint32)); if (bio_fread(veclen, sizeof(uint32), n_feat, fp, swap, &chksum) != n_feat) { goto error; } if (bio_fread_1d((void **)&raw, sizeof(float32), &n, fp, swap, &chksum) < 0) { ckd_free(veclen); goto error; } for (i = 0, blk = 0, maxveclen = 0; i < n_feat; i++) { blk += veclen[i] * veclen[i]; if (veclen[i] > maxveclen) maxveclen = veclen[i]; } if (n != n_mgau * n_density * blk) { if (need_full) E_ERROR("Failed to read full covariance file %s (expected %d values, got %d)\n", fn, n_mgau * n_density * blk, n); goto error; } o = (vector_t ****)ckd_calloc_4d(n_mgau, n_feat, n_density, maxveclen, sizeof(vector_t)); for (i = 0, r = 0; i < n_mgau; i++) { for (j = 0; j < n_feat; j++) { for (k = 0; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { o[i][j][k][l] = &raw[r]; r += veclen[j]; } } } } if (do_chk) { /* See if the checksum in the file matches that which was computed from the read data */ if (bio_fread(&sv_chksum, sizeof(uint32), 1, fp, swap, &ignore) != 1) { goto error; } if (sv_chksum != chksum) { E_FATAL("Checksum error; read corrupt data.\n"); } } *out = o; *out_n_mgau = n_mgau; *out_n_feat = n_feat; *out_n_density = n_density; *out_veclen = veclen; s3close(fp); E_INFO("Read %s [%ux%ux%u array of full matrices]\n", fn, n_mgau, n_feat, n_density); return S3_SUCCESS; error: if (fp) s3close(fp); return S3_ERROR; }
int32 feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext, int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr) { char *path; char *ps = "/"; int32 win, nfr; int32 file_length, cepext_length, path_length = 0; mfcc_t **mfc; if (fcb->cepsize <= 0) { E_ERROR("Bad cepsize: %d\n", fcb->cepsize); return -1; } if (cepext == NULL) cepext = ""; /* * Create mfc filename, combining file, dir and extension if * necessary */ /* * First we decide about the path. If dir is defined, then use * it. Otherwise assume the filename already contains the path. */ if (dir == NULL) { dir = ""; ps = ""; /* * This is not true but some 3rd party apps * may parse the output explicitly checking for this line */ E_INFO("At directory . (current directory)\n"); } else { E_INFO("At directory %s\n", dir); /* * Do not forget the path separator! */ path_length += strlen(dir) + 1; } /* * Include cepext, if it's not already part of the filename. */ file_length = strlen(file); cepext_length = strlen(cepext); if ((file_length > cepext_length) && (strcmp(file + file_length - cepext_length, cepext) == 0)) { cepext = ""; cepext_length = 0; } /* * Do not forget the '\0' */ path_length += file_length + cepext_length + 1; path = (char*) ckd_calloc(path_length, sizeof(char)); #ifdef HAVE_SNPRINTF /* * Paranoia is our best friend... */ while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) { path_length = file_length; path = (char*) ckd_realloc(path, path_length * sizeof(char)); } #else sprintf(path, "%s%s%s%s", dir, ps, file, cepext); #endif win = feat_window_size(fcb); /* Pad maxfr with win, so we read enough raw feature data to * calculate the requisite number of dynamic features. */ if (maxfr >= 0) maxfr += win * 2; if (feat != NULL) { /* Read mfc file including window or padding if necessary. */ nfr = feat_s2mfc_read(path, win, sf, ef, &mfc, maxfr, fcb->cepsize); ckd_free(path); if (nfr < 0) { ckd_free_2d((void **) mfc); return -1; } /* Actually compute the features */ feat_compute_utt(fcb, mfc, nfr, win, feat); ckd_free_2d((void **) mfc); } else { /* Just calculate the number of frames we would need. */ nfr = feat_s2mfc_read(path, win, sf, ef, NULL, maxfr, fcb->cepsize); ckd_free(path); if (nfr < 0) return nfr; } return (nfr - win * 2); }
feat_t * feat_init(char const *type, cmn_type_t cmn, int32 varnorm, agc_type_t agc, int32 breport, int32 cepsize) { feat_t *fcb; if (cepsize == 0) cepsize = 13; if (breport) E_INFO ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); fcb->refcount = 1; fcb->name = (char *) ckd_salloc(type); if (strcmp(type, "s2_4x") == 0) { /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ if (cepsize != 13) { E_ERROR("s2_4x features require cepsize == 13\n"); ckd_free(fcb); return NULL; } fcb->cepsize = 13; fcb->n_stream = 4; fcb->stream_len = (int32 *) ckd_calloc(4, sizeof(int32)); fcb->stream_len[0] = 12; fcb->stream_len[1] = 24; fcb->stream_len[2] = 3; fcb->stream_len[3] = 12; fcb->out_dim = 51; fcb->window_size = 4; fcb->compute_feat = feat_s2_4x_cep2feat; } else if (strcmp(type, "s3_1x39") == 0) { /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ if (cepsize != 13) { E_ERROR("s2_4x features require cepsize == 13\n"); ckd_free(fcb); return NULL; } fcb->cepsize = 13; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = 39; fcb->out_dim = 39; fcb->window_size = 3; fcb->compute_feat = feat_s3_1x39_cep2feat; } else if (strncmp(type, "1s_c_d_dd", 9) == 0) { fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = cepsize * 3; fcb->out_dim = cepsize * 3; fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ fcb->compute_feat = feat_1s_c_d_dd_cep2feat; } else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = cepsize * 4; fcb->out_dim = cepsize * 4; fcb->window_size = FEAT_DCEP_WIN * 2; fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; } else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { /* 1-stream cep/dcep */ fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = feat_cepsize(fcb) * 2; fcb->out_dim = fcb->stream_len[0]; fcb->window_size = 2; fcb->compute_feat = feat_s3_cep_dcep; } else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { /* 1-stream cep */ fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = feat_cepsize(fcb); fcb->out_dim = fcb->stream_len[0]; fcb->window_size = 0; fcb->compute_feat = feat_s3_cep; } else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { /* 1-stream cep with frames concatenated, so called cepwin features */ if (strncmp(type, "1s_3c", 5) == 0) fcb->window_size = 3; else fcb->window_size = 4; fcb->cepsize = cepsize; fcb->n_stream = 1; fcb->stream_len = (int32 *) ckd_calloc(1, sizeof(int32)); fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); fcb->out_dim = fcb->stream_len[0]; fcb->compute_feat = feat_s3_cepwin; } else { int32 i, l, k; char *strp; char *mtype = ckd_salloc(type); char *wd = ckd_salloc(type); /* * Generic definition: Format should be %d,%d,%d,...,%d (i.e., * comma separated list of feature stream widths; #items = * #streams). An optional window size (frames will be * concatenated) is also allowed, which can be specified with * a colon after the list of feature streams. */ l = strlen(mtype); k = 0; for (i = 1; i < l - 1; i++) { if (mtype[i] == ',') { mtype[i] = ' '; k++; } else if (mtype[i] == ':') { mtype[i] = '\0'; fcb->window_size = atoi(mtype + i + 1); break; } } k++; /* Presumably there are (#commas+1) streams */ fcb->n_stream = k; fcb->stream_len = (int32 *) ckd_calloc(k, sizeof(int32)); /* Scan individual feature stream lengths */ strp = mtype; i = 0; fcb->out_dim = 0; fcb->cepsize = 0; while (sscanf(strp, "%s%n", wd, &l) == 1) { strp += l; if ((i >= fcb->n_stream) || (sscanf(wd, "%d", &(fcb->stream_len[i])) != 1) || (fcb->stream_len[i] <= 0)) E_FATAL("Bad feature type argument\n"); /* Input size before windowing */ fcb->cepsize += fcb->stream_len[i]; if (fcb->window_size > 0) fcb->stream_len[i] *= (fcb->window_size * 2 + 1); /* Output size after windowing */ fcb->out_dim += fcb->stream_len[i]; i++; } if (i != fcb->n_stream) E_FATAL("Bad feature type argument\n"); /* Input is already the feature stream */ fcb->compute_feat = feat_copy; ckd_free(mtype); ckd_free(wd); } if (cmn != CMN_NONE) fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); fcb->cmn = cmn; fcb->varnorm = varnorm; if (agc != AGC_NONE) { fcb->agc_struct = agc_init(); /* * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY * switches to EMAX */ /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); } fcb->agc = agc; /* * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() */ fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, feat_cepsize(fcb), sizeof(mfcc_t)); /* This one is actually just an array of pointers to "flatten out" * wraparounds. */ fcb->tmpcepbuf = ckd_calloc(2 * feat_window_size(fcb) + 1, sizeof(*fcb->tmpcepbuf)); return fcb; }
fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }
int s3_decode_record_hyps(s3_decode_t * _decode, int _end_utt) { int32 i = 0; glist_t hyp_list; gnode_t *node; srch_hyp_t *hyp; char *hyp_strptr = 0; char *hyp_str = 0; srch_t *srch; srch_hyp_t **hyp_segs = 0; int hyp_seglen = 0; int hyp_strlen = 0; int finish_wid = 0; kb_t *kb = 0; dict_t *dict; int rv; if (_decode == NULL) return S3_DECODE_ERROR_NULL_POINTER; s3_decode_free_hyps(_decode); kb = &_decode->kb; dict = kbcore_dict(_decode->kbcore); srch = (srch_t *) _decode->kb.srch; hyp_list = srch_get_hyp(srch); if (hyp_list == NULL) { E_WARN("Failed to retrieve viterbi history.\n"); return S3_DECODE_ERROR_INTERNAL; } /** record the segment length and the overall string length */ finish_wid = dict_finishwid(dict); for (node = hyp_list; node != NULL; node = gnode_next(node)) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_seglen++; if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { hyp_strlen += strlen(dict_wordstr(dict, dict_basewid(dict, hyp->id))) + 1; } } if (hyp_strlen == 0) { hyp_strlen = 1; } /** allocate array to hold the segments and/or decoded string */ hyp_str = (char *) ckd_calloc(hyp_strlen, sizeof(char)); hyp_segs = (srch_hyp_t **) ckd_calloc(hyp_seglen + 1, sizeof(srch_hyp_t *)); if (hyp_segs == NULL || hyp_str == NULL) { E_WARN("Failed to allocate storage for hypothesis.\n"); rv = S3_DECODE_ERROR_OUT_OF_MEMORY; goto s3_decode_record_hyps_cleanup; } /** iterate thru to fill in the array of segments and/or decoded string */ i = 0; hyp_strptr = hyp_str; for (node = hyp_list; node != NULL; node = gnode_next(node), i++) { hyp = (srch_hyp_t *) gnode_ptr(node); hyp_segs[i] = hyp; hyp->word = dict_wordstr(dict, dict_basewid(dict, hyp->id)); if (!dict_filler_word(dict, hyp->id) && hyp->id != finish_wid) { strcat(hyp_strptr, dict_wordstr(dict, dict_basewid(dict, hyp->id))); hyp_strptr += strlen(hyp_strptr); *hyp_strptr = ' '; hyp_strptr += 1; } } glist_free(hyp_list); hyp_str[hyp_strlen - 1] = '\0'; hyp_segs[hyp_seglen] = 0; _decode->hyp_frame_num = _decode->num_frames_decoded; _decode->hyp_segs = hyp_segs; _decode->hyp_str = hyp_str; return S3_DECODE_SUCCESS; s3_decode_record_hyps_cleanup: if (hyp_segs != NULL) { ckd_free(hyp_segs); } if (hyp_str != NULL) { ckd_free(hyp_str); } if (hyp_list != NULL) { for (node = hyp_list; node != NULL; node = gnode_next(node)) { if ((hyp = (srch_hyp_t *) gnode_ptr(node)) != NULL) { ckd_free(hyp); } } glist_free(hyp_list); } return rv; }
void split_node_comp(dtree_t *tr, uint32 node_id, float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 n_base_phone, uint32 **dfeat, uint32 n_dfeat, uint32 split_min, uint32 split_max, float32 split_thr, float32 mwfloor) { uint32 *id, n_id; uint32 *id_yes, n_yes; uint32 *id_no, n_no; dtree_node_t *node; uint32 node_id_yes; uint32 node_id_no; uint32 ii, i; node = &tr->node[node_id]; id = node->id; n_id = node->n_id; for (ii = 0, n_yes = 0, n_no = 0; ii < n_id; ii++) { i = id[ii]; if (eval_comp_quest((comp_quest_t *)node->q, dfeat[i], n_dfeat)) { ++n_yes; } else { ++n_no; } } #if 0 fprintf(stderr, "Comp Split: "); print_comp_quest(stderr, pset, (comp_quest_t *)node->q); fprintf(stderr, " %u/%u %.3e\n", n_yes, n_no, node->wt_ent_dec); #endif id_yes = ckd_calloc(n_yes, sizeof(uint32)); id_no = ckd_calloc(n_no, sizeof(uint32)); for (ii = 0, n_yes = 0, n_no = 0; ii < n_id; ii++) { i = id[ii]; if (eval_comp_quest((comp_quest_t *)node->q, dfeat[i], n_dfeat)) { id_yes[n_yes] = i; ++n_yes; } else { id_no[n_no] = i; ++n_no; } } node_id_yes = tr->n_node++; node_id_no = tr->n_node++; node->y = &tr->node[node_id_yes]; node->n = &tr->node[node_id_no]; node->y->p = node; node->n->p = node; mk_node(node->y, node_id_yes, id_yes, n_yes, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, mwfloor); node->y->q = (void *)mk_comp_quest(&(node->y->wt_ent_dec), mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, id_yes, n_yes, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); mk_node(node->n, node_id_no, id_no, n_no, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, mwfloor); node->n->q = (void *)mk_comp_quest(&(node->n->wt_ent_dec), mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, id_no, n_no, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); }
int ep_endpoint(endptr_t * _ep, int *_classes, int _num_frames, int **_endpts) { int i; int *endpts; assert(_ep != NULL); assert(_endpts != NULL); if (_ep->max_endpts < _num_frames) { if (_ep->endpts != NULL) { ckd_free(_ep->endpts); } _ep->endpts = NULL; _ep->max_endpts = _ep->num_endpts = 0; if ((_ep->endpts = (int *) ckd_calloc(sizeof(int), _num_frames)) == NULL) { return -1; } _ep->max_endpts = _num_frames; } _ep->num_endpts = _num_frames; endpts = _ep->endpts; for (i = 0; i < _num_frames; i++) { switch (_ep->state) { case EP_STATE_IDLE: if (_classes[i] == CLASS_OWNER) { _ep->start_counter = 1; _ep->state = EP_STATE_LEADER; endpts[i] = EP_MAYBE; } else { endpts[i] = EP_SILENCE; } break; case EP_STATE_LEADER: if (_classes[i] == CLASS_OWNER) { if (++_ep->start_counter >= _ep->pad_leader) { _ep->state = EP_STATE_SPEECH; endpts[i] = EP_SPEECH; } else { endpts[i] = EP_MAYBE; } } else { _ep->cancel_counter = 1; _ep->state = EP_STATE_CANCEL; endpts[i] = EP_MAYBE; } break; case EP_STATE_SPEECH: if (_classes[i] == CLASS_OWNER) { endpts[i] = EP_SPEECH; } else { _ep->end_counter = 1; _ep->state = EP_STATE_TRAILER; endpts[i] = EP_SPEECH; } break; case EP_STATE_TRAILER: if (_classes[i] == CLASS_OWNER) { _ep->state = EP_STATE_SPEECH; endpts[i] = EP_SPEECH; } else if (++_ep->end_counter >= _ep->pad_trailer) { _ep->state = EP_STATE_IDLE; endpts[i] = EP_SILENCE; } else { endpts[i] = EP_SPEECH; } break; case EP_STATE_CANCEL: _ep->start_counter++; if (_classes[i] == CLASS_OWNER) { _ep->state = EP_STATE_LEADER; endpts[i] = EP_MAYBE; } else if (++_ep->cancel_counter >= _ep->pad_cancel) { _ep->state = EP_STATE_IDLE; endpts[i] = EP_SILENCE; } else { endpts[i] = EP_MAYBE; } break; } } *_endpts = endpts; return 0; }
static uint32 setup_obs_1class(uint32 strm, uint32 n_frame, uint32 n_stream, uint32 *veclen, uint32 blksize) { float32 *buf; vector_t *frm; uint32 i, l, o; uint32 n_sv_frame; uint32 ignore = 0; n_sv_frame = n_frame / stride; if (l_strm == strm) { E_INFO("No need to read data; using existing buffered data\n"); return n_sv_frame; } n_tot_frame += n_sv_frame; l_strm = strm; E_INFO("alloc'ing %uMb obs buf\n", n_sv_frame*veclen[strm]*sizeof(float32) / (1024 * 1024)); if (obuf) { ckd_free(obuf); obuf = NULL; } obuf = ckd_calloc(n_sv_frame * veclen[strm], sizeof(float32)); buf = (float32 *)ckd_calloc(blksize, sizeof(float32)); frm = (vector_t *)ckd_calloc(n_stream, sizeof(float32 *)); for (i = 0, l = 0; i < n_stream; i++) { frm[i] = &buf[l]; l += veclen[i]; } assert(l == blksize); assert(dmp_fp != NULL); if (fseek(dmp_fp, data_offset, SEEK_SET) < 0) { E_ERROR_SYSTEM("Can't seek to start of data\n"); return 0; } for (i = 0, o = 0; i < n_frame; i++) { if (s3read(buf, sizeof(float32), blksize, dmp_fp, dmp_swp, &ignore) != blksize) { E_ERROR_SYSTEM("Can't read dump file\n"); return 0; } if ((i % stride) == 0) { memcpy(&obuf[o], (void *)&frm[strm][0], sizeof(float32) * veclen[strm]); o += veclen[strm]; } } ckd_free(buf); ckd_free(frm); return n_sv_frame; }
static uint32 setup_obs_multiclass(uint32 ts, uint32 strm, uint32 n_frame, uint32 veclen) { uint32 i, o, k; uint32 n_i_frame; vector_t **feat; uint32 d_ts; uint32 n_sv_frame; n_sv_frame = n_frame / stride; if ((l_ts == ts) && (l_strm == strm)) { E_INFO("No need to read data; using existing buffered data\n"); return n_sv_frame; } n_tot_frame += n_sv_frame; l_ts = ts; l_strm = strm; E_INFO("alloc'ing %uMb obs buf\n", n_sv_frame*veclen*sizeof(float32) / (1024 * 1024)); if (obuf) { ckd_free(obuf); obuf = NULL; } obuf = ckd_calloc(n_sv_frame * veclen, sizeof(float32)); if (stride == 1) { E_INFO("Reading all frames\n"); } else { E_INFO("Reading 1 out of every %u frames from obs dmp file...\n", stride); } if (o2d) { E_INFO("o_ts == %u ->", ts); for (k = 0; k < n_o2d[ts]; k++) { E_INFOCONT(" %d", o2d[ts][k]); } E_INFOCONT("\n"); for (k = 0, o = 0; k < n_o2d[ts]; k++) { d_ts = o2d[ts][k]; for (i = 0; segdmp_next_feat(d_ts, &feat, &n_i_frame); i++) { assert(n_i_frame == 1); if ((i % stride) == 0) { memcpy(&obuf[o], (void *)&feat[0][strm][0], sizeof(float32) * veclen); o += veclen; } ckd_free((void *)&feat[0][0][0]); ckd_free_2d((void **)feat); } } } else { E_INFO("dmp mdef == output mdef\n"); for (i = 0, o = 0; segdmp_next_feat(ts, &feat, &n_i_frame); i++) { assert(n_i_frame == 1); if ((i % stride) == 0) { memcpy(&obuf[o], (void *)&feat[0][strm][0], sizeof(float32) * veclen); o += veclen; } ckd_free((void *)&feat[0][0][0]); ckd_free_2d((void **)feat); } } if ((o / veclen) != n_sv_frame) { E_WARN("Expected %u frames, but read %u\n", n_sv_frame, o / veclen); } E_INFO("done reading %u frames\n", n_sv_frame); return n_sv_frame; }
uint32 setup_d2o_map(model_def_t *d_mdef, model_def_t *o_mdef) { model_def_entry_t *o_defn, *d_defn; uint32 d_ts; uint32 o_ts; uint32 *mapped; uint32 i, j, k, d; const char *nm; int did_warn = FALSE; if (d_mdef->n_tied_state < o_mdef-> n_tied_state) { E_FATAL("more tied states in output than in dump mdef (%u vs %u)\n", o_mdef->n_tied_state, d_mdef->n_tied_state); } if (d_mdef->n_tied_ci_state != o_mdef->n_tied_ci_state) { E_FATAL("# tied ci state in output, %u not equal to # in dmp, %u\n", o_mdef->n_tied_ci_state, d_mdef->n_tied_ci_state); } n_o2d = (uint32 *)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32)); i_o2d = (uint32 *)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32)); o2d = (uint32 **)ckd_calloc(o_mdef->n_tied_state, sizeof(uint32 *)); mapped = (uint32 *)ckd_calloc(d_mdef->n_tied_state, sizeof(uint32)); for (i = 0; i < o_mdef->n_defn; i++) { nm = acmod_set_id2name(o_mdef->acmod_set, i); d = acmod_set_name2id(d_mdef->acmod_set, nm); if (d == NO_ID) { if (!did_warn) { E_WARN("Some models in the output mdef not in the dump mdef\n"); did_warn = TRUE; } continue; } o_defn = &o_mdef->defn[i]; d_defn = &d_mdef->defn[d]; for (j = 0; j < o_defn->n_state; j++) { o_ts = o_defn->state[j]; d_ts = d_defn->state[j]; if ((o_ts != TYING_NO_ID) && (o_ts != TYING_NO_ID)) { if (mapped[d_ts] == FALSE) { ++n_o2d[o_ts]; mapped[d_ts] = TRUE; } } else { if (!((o_ts == TYING_NO_ID) && (o_ts == TYING_NO_ID))) { E_INFO("%s state is NULL but %s isn't.\n", (o_ts == TYING_NO_ID ? "output" : "dump"), (o_ts == TYING_NO_ID ? "dump" : "output")); } } } } for (i = 0; i < o_mdef->n_tied_state; i++) { o2d[i] = (uint32 *)ckd_calloc(n_o2d[i], sizeof(uint32)); } for (i = 0; i < o_mdef->n_defn; i++) { /* Figure out the index in the dump mdef for the model in the output mdef */ nm = acmod_set_id2name(o_mdef->acmod_set, i); d = acmod_set_name2id(d_mdef->acmod_set, nm); if (d == NO_ID) continue; o_defn = &o_mdef->defn[i]; d_defn = &d_mdef->defn[d]; for (j = 0; j < o_defn->n_state; j++) { o_ts = o_defn->state[j]; d_ts = d_defn->state[j]; if ((o_ts != TYING_NO_ID) && (o_ts != TYING_NO_ID)) { for (k = 0; k < i_o2d[o_ts]; k++) { if (o2d[o_ts][k] == d_ts) break; } if (k == i_o2d[o_ts]) { o2d[o_ts][i_o2d[o_ts]++] = d_ts; } } else { if (!((o_ts == TYING_NO_ID) && (o_ts == TYING_NO_ID))) { E_INFO("%s state is NULL but %s isn't.\n", (o_ts == TYING_NO_ID ? "output" : "dump"), (o_ts == TYING_NO_ID ? "dump" : "output")); } } } } for (i = 0; i < o_mdef->n_tied_state; i++) { if (i_o2d[i] != n_o2d[i]) { E_FATAL("%u != %u for %u\n", i_o2d[i], n_o2d[i], i); } } for (i = 0; i < o_mdef->n_tied_state; i++) { i_o2d[i] = 0; } return S3_SUCCESS; }
float64 reest_sum(uint32 ts, vector_t **mean, vector_t **var, float32 **mixw, uint32 n_density, uint32 n_stream, uint32 n_in_obs, uint32 *veclen, uint32 blksize, uint32 n_iter, uint32 twopassvar, uint32 vartiethr) { uint32 o, i, j, k, l; float32 *mixw_acc; float32 *cb_acc; vector_t **mean_acc_xx; vector_t **var_acc_xx; vector_t *mean_acc; vector_t *var_acc; float64 ol, ttt, diff, log_tot_ol = 0, p_log_tot_ol = 0; float64 **norm, *den; float64 log_a_den=0; float32 mixw_norm; vector_t obs; uint32 n_obs; vector_t ***n_mean_xx = NULL; vector_t *n_mean = NULL; float64 avg_lik=0, p_avg_lik=0; uint32 tievar = FALSE; E_INFO("EM reestimation of mixw/means/vars\n"); if (twopassvar) { n_mean_xx = gauden_alloc_param(1, 1, n_density, veclen); n_mean = n_mean_xx[0][0]; } /* allocate mixing weight accumulators */ mixw_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); cb_acc = (float32 *)ckd_calloc(n_density, sizeof(float32)); mean_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, blksize); mean_acc = mean_acc_xx[0]; var_acc_xx = (vector_t **)alloc_gau_acc(1, n_density, veclen, blksize); var_acc = var_acc_xx[0]; den = (float64 *)ckd_calloc(n_density, sizeof(float64)); norm = (float64 **)ckd_calloc_2d(n_stream, n_density, sizeof(float64)); for (j = 0; j < n_stream; j++) { n_obs = setup_obs(ts, j, n_in_obs, n_stream, veclen, blksize); if (n_obs < vartiethr) tievar = TRUE; for (i = 0; i < n_iter; i++) { p_log_tot_ol = log_tot_ol; log_tot_ol = 0; for (k = 0; k < n_density; k++) { /* floor variances */ for (l = 0; l < veclen[j]; l++) if (var[j][k][l] < 1e-4) var[j][k][l] = 1e-4; /* compute normalization factors for Gaussian densities */ norm[j][k] = diag_norm(var[j][k], veclen[j]); /* precompute 1/(2sigma^2) terms */ diag_eval_precomp(var[j][k], veclen[j]); } if (twopassvar) { /* do a pass over the corpus to compute reestimated means */ for (o = 0; o < n_obs; o++) { float64 mx; obs = get_obs(o); mx = MIN_NEG_FLOAT64; for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(log_a_den - mx); ol += mixw[j][k] * den[k]; } for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { mean_acc[k][l] += obs[l] * ttt; } } } cb_acc[0] = 1.0 / cb_acc[0]; for (k = 1; k < n_density; k++) { cb_acc[k] = 1.0 / cb_acc[k]; } /* compute the reestimated mean value to be used in next pass */ for (k = 0; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { n_mean[k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; } cb_acc[k] = 0; } } else { n_mean = mean[j]; } for (o = 0; o < n_obs; o++) { float64 mx; /* Do a pass over the data to accumulate reestimation sums * for the remaining parameters (including means * if not a 2-pass config) */ /* Get the next observation */ obs = get_obs(o); mx = MIN_NEG_FLOAT64; /* Compute the mixture density value given the * observation and the model parameters */ for (k = 0; k < n_density; k++) { /* really log(den) for the moment */ den[k] = log_diag_eval(obs, norm[j][k], mean[j][k], var[j][k], veclen[j]); if (mx < den[k]) mx = den[k]; } for (k = 0, ol = 0; k < n_density; k++) { den[k] = exp(den[k] - mx); ol += mixw[j][k] * den[k]; } log_tot_ol += log(ol) + mx; /* Compute the reestimation sum terms for each * of the component densities */ for (k = 0; k < n_density; k++) { ttt = mixw[j][k] * den[k] / ol; mixw_acc[k] += ttt; cb_acc[k] += ttt; for (l = 0; l < veclen[j]; l++) { /* if not doing two-pass variance computation * n_mean <- mean above. */ diff = obs[l] - n_mean[k][l]; if (!twopassvar) { mean_acc[k][l] += ttt * obs[l]; } var_acc[k][l] += ttt * diff * diff; } } } avg_lik = exp(log_tot_ol / n_obs); if (p_log_tot_ol != 0) p_avg_lik = exp(p_log_tot_ol / n_obs); else p_avg_lik = 0.5 * avg_lik; E_INFO("EM stream %u: [%u] avg_lik %e conv_ratio %e\n", j, i, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); /* normalize after iteration */ if (tievar) { /* create a sum over all densities in entry 0 */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][0][l] += var[j][k][l]; } cb_acc[0] += cb_acc[k]; } /* copy entry 0 back to remaining entries */ for (k = 1; k < n_density; k++) { for (l = 0; l < veclen[j]; l++) { var[j][k][l] = var[j][0][l]; } cb_acc[k] = cb_acc[0]; } } for (k = 0, mixw_norm = 0; k < n_density; k++) { /* norm for per density expectations */ cb_acc[k] = 1.0 / cb_acc[k]; mixw_norm += mixw_acc[k]; } mixw_norm = 1.0 / mixw_norm; if (!twopassvar) { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { mean[j][k][l] = mean_acc[k][l] * cb_acc[k]; mean_acc[k][l] = 0; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } else { for (k = 0; k < n_density; k++) { mixw[j][k] = mixw_acc[k] * mixw_norm; mixw_acc[k] = 0; for (l = 0; l < veclen[j]; l++) { /* already computed in first pass */ mean[j][k][l] = n_mean[k][l]; var[j][k][l] = var_acc[k][l] * cb_acc[k]; var_acc[k][l] = 0; } cb_acc[k] = 0; } } } /* end of EM iteration loop */ E_INFO("EM stream %u: [final] n_obs %u avg_lik %e conv_ratio %e\n", j, n_obs, avg_lik, (avg_lik - p_avg_lik) / p_avg_lik); } /* end of feature stream loop */ ckd_free((void *)mixw_acc); ckd_free((void *)cb_acc); ckd_free((void *)&mean_acc_xx[0][0][0]); ckd_free_2d((void **)mean_acc_xx); ckd_free((void *)&var_acc_xx[0][0][0]); ckd_free_2d((void **)var_acc_xx); if (n_mean_xx) { ckd_free((void *)&n_mean_xx[0][0][0]); ckd_free_2d((void **)n_mean); } ckd_free_2d((void **)norm); ckd_free((void *)den); return log_tot_ol; }
SWIGINTERN Nbest *new_Nbest(Decoder *d){ Nbest *nbest = ckd_calloc(1, sizeof(*nbest)); nbest->nbest = ps_nbest(d, 0, -1, NULL, NULL); return nbest; }
void cmn_prior(float32 **incep, int32 varnorm, int32 nfr, int32 ceplen, int32 endutt) { static float32 *cur_mean = NULL; /* the mean subtracted from input frames */ static float32 *sum = NULL; /* the sum over input frames */ static int32 nframe; /* the total number of input frames */ static int32 initialize=1; float32 sf; int32 i, j; if (varnorm) E_FATAL("Variance normalization not implemented in live mode decode\n"); if (initialize){ cur_mean = (float32 *) ckd_calloc(ceplen, sizeof(float32)); /* A front-end dependent magic number */ cur_mean[0] = 12.0; sum = (float32 *) ckd_calloc(ceplen, sizeof(float32)); nframe = 0; initialize = 0; E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n", cur_mean[0], ceplen-1); } if (nfr <= 0) return; for (i = 0; i < nfr; i++){ for (j = 0; j < ceplen; j++){ sum[j] += incep[i][j]; incep[i][j] -= cur_mean[j]; } ++nframe; } /* Shift buffer down if we have more than CMN_WIN_HWM frames */ if (nframe > CMN_WIN_HWM) { sf = (float32) (1.0/nframe); for (i = 0; i < ceplen; i++) cur_mean[i] = sum[i] * sf; /* Make the accumulation decay exponentially */ if (nframe >= CMN_WIN_HWM) { sf = CMN_WIN * sf; for (i = 0; i < ceplen; i++) sum[i] *= sf; nframe = CMN_WIN; } } if (endutt) { /* Update mean buffer */ /* 01.15.01 RAH - removing this printf, it is damn annoying printf("Mean norm update: from <"); for (i = 0; i < ceplen; i++) printf("%5.2f ", cur_mean[i]); printf(">\n"); */ sf = (float32) (1.0/nframe); for (i = 0; i < ceplen; i++) cur_mean[i] = sum[i] * sf; /* Make the accumulation decay exponentially */ if (nframe > CMN_WIN_HWM) { sf = CMN_WIN * sf; for (i = 0; i < ceplen; i++) sum[i] *= sf; nframe = CMN_WIN; } /* 01.15.01 RAH - removing this printf, it is damn annoying printf("Mean norm update: to < "); for (i = 0; i < ceplen; i++) printf("%5.2f ", cur_mean[i]); printf(">\n"); */ } }
static ngram_iter_t * lm3g_template_iter(ngram_model_t *base, int32 wid, int32 *history, int32 n_hist) { NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; lm3g_iter_t *itor = (lm3g_iter_t *)ckd_calloc(1, sizeof(*itor)); ngram_iter_init((ngram_iter_t *)itor, base, n_hist, FALSE); if (n_hist == 0) { /* Unigram is the easiest. */ itor->ug = model->lm3g.unigrams + wid; return (ngram_iter_t *)itor; } else if (n_hist == 1) { int32 i, n, b; /* Find the bigram, as in bg_score above (duplicate code...) */ itor->ug = model->lm3g.unigrams + history[0]; b = FIRST_BG(model, history[0]); n = FIRST_BG(model, history[0] + 1) - b; itor->bg = model->lm3g.bigrams + b; /* If no such bigram exists then fail. */ if ((i = find_bg(itor->bg, n, wid)) < 0) { ngram_iter_free((ngram_iter_t *)itor); return NULL; } itor->bg += i; return (ngram_iter_t *)itor; } else if (n_hist == 2) { int32 i, n; tginfo_t *tginfo, *prev_tginfo; /* Find the trigram, as in tg_score above (duplicate code...) */ itor->ug = model->lm3g.unigrams + history[1]; prev_tginfo = NULL; for (tginfo = model->lm3g.tginfo[history[0]]; tginfo; tginfo = tginfo->next) { if (tginfo->w1 == history[1]) break; prev_tginfo = tginfo; } if (!tginfo) { load_tginfo(model, history[1], history[0]); tginfo = model->lm3g.tginfo[history[0]]; } else if (prev_tginfo) { prev_tginfo->next = tginfo->next; tginfo->next = model->lm3g.tginfo[history[0]]; model->lm3g.tginfo[history[0]] = tginfo; } tginfo->used = 1; /* Trigrams for w1,w2 now pointed to by tginfo */ n = tginfo->n_tg; itor->tg = tginfo->tg; if ((i = find_tg(itor->tg, n, wid)) >= 0) { itor->tg += i; /* Now advance the bigram pointer accordingly. FIXME: * Note that we actually already found the relevant bigram * in load_tginfo. */ itor->bg = model->lm3g.bigrams; while (FIRST_TG(model, (itor->bg - model->lm3g.bigrams + 1)) <= (itor->tg - model->lm3g.trigrams)) ++itor->bg; return (ngram_iter_t *)itor; } else { ngram_iter_free((ngram_iter_t *)itor); return (ngram_iter_t *)NULL; } } else { /* Should not happen. */ assert(n_hist == 0); /* Guaranteed to fail. */ ngram_iter_free((ngram_iter_t *)itor); return NULL; } }
dtree_t * read_final_tree(FILE *fp, pset_t *pset, uint32 n_pset) { dtree_t *out; dtree_node_t *node; uint32 n_node; char *s, str[128]; lineiter_t *ln = NULL; uint32 n_scan; uint32 i, node_id, node_id_y, node_id_n; comp_quest_t *q; float64 ent; float32 occ; int err; out = ckd_calloc(1, sizeof(dtree_t)); ln = lineiter_start_clean(fp); s = ln->buf; sscanf(s, "%s%n", str, &n_scan); if (strcmp(str, "n_node") == 0) { s += n_scan; sscanf(s, "%u", &n_node); } else { E_FATAL("Format error; expecting n_node\n"); } out->n_node = n_node; out->node = node = ckd_calloc(n_node, sizeof(dtree_node_t)); for (i = 0; i < n_node; i++) node[i].node_id = i; err = FALSE; while ((ln = lineiter_next(ln))) { s = ln->buf; sscanf(s, "%u%n", &node_id, &n_scan); s += n_scan; sscanf(s, "%s%n", str, &n_scan); s += n_scan; if (strcmp(str, "-") == 0) { node_id_y = NO_ID; } else { node_id_y = atoi(str); } sscanf(s, "%s%n", str, &n_scan); s += n_scan; if (strcmp(str, "-") == 0) { node_id_n = NO_ID; } else { node_id_n = atoi(str); } sscanf(s, "%le%n", &ent, &n_scan); s += n_scan; sscanf(s, "%e%n", &occ, &n_scan); s += n_scan; if ((node_id_y != NO_ID) && (node_id_y != NO_ID)) { q = (comp_quest_t *)ckd_calloc(1, sizeof(comp_quest_t)); if (s3parse_comp_quest(pset, n_pset, q, s) != S3_SUCCESS) { err = TRUE; } node[node_id].q = q; } else node[node_id].q = NULL; /* ck if internal node */ if ((node_id_y != NO_ID) && (node_id_y != NO_ID)) node[node_id].wt_ent_dec = ent; else node[node_id].wt_ent = ent; node[node_id].occ = occ; if ((node_id_y != NO_ID) && (node_id_y != NO_ID)) { node[node_id].y = &node[node_id_y]; node[node_id].n = &node[node_id_n]; node[node_id_y].p = node[node_id_n].p = &node[node_id]; } else { node[node_id].y = NULL; node[node_id].n = NULL; } } if (err == TRUE) { free_tree(out); out = NULL; } lineiter_free(ln); return out; }
static float64 furthest_neighbor_kmeans(uint32 n_obs, uint32 veclen, vector_t *mean, uint32 n_mean, float32 min_ratio, uint32 max_iter) { uint32 **obs_of; uint32 *occ_cnt; codew_t *lbl; uint32 k_max=0, occ_max; uint32 n_mean_cur; vector_t *extr_mean; uint32 k, l; float64 sqerr; lbl = ckd_calloc(n_obs, sizeof(codew_t)); occ_cnt = ckd_calloc(n_mean, sizeof(uint32)); n_mean_cur = 1; extr_mean = (float32 **)ckd_calloc_2d(2, veclen, sizeof(float32)); do { E_INFO("n_mean == %u\n", n_mean_cur); obs_of = cw_obs(lbl, n_mean_cur, n_obs, occ_cnt); occ_max = 0; for (k = 0; k < n_mean_cur; k++) { if (occ_cnt[k] > occ_max) { occ_max = occ_cnt[k]; k_max = k; } } /* set the initial values of the new means by extreme means */ E_INFO("d_max == %e\n", find_farthest_neigh(obs_of[k_max], occ_cnt[k_max], veclen, extr_mean[0], extr_mean[1])); sqerr = k_means_subset(extr_mean, 2, obs_of[k_max], occ_cnt[k_max], veclen, min_ratio, max_iter, NULL); for (l = 0; l < veclen; l++) { mean[k_max][l] = extr_mean[0][l]; mean[n_mean_cur][l] = extr_mean[1][l]; } ++n_mean_cur; ckd_free(lbl); sqerr = k_means(mean, n_mean_cur, n_obs, veclen, min_ratio, max_iter, &lbl); E_INFO("\tsquerr == %e\n", sqerr); } while (n_mean_cur < n_mean); return sqerr; }
int mk_node(dtree_node_t *node, uint32 node_id, uint32 *id, uint32 n_id, float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, float32 mwfloor) { float32 ***mixw_occ, **dist; uint32 mm, m, s, j, k; float64 *dnom, norm, wt_ent, s_wt_ent, occ; float32 mx_wt; uint32 *l_id; float32 ***lmeans=0,***lvars=0; float32 varfloor=0; uint32 continuous, sumveclen; char* type; type = (char *)cmd_ln_str("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; if (continuous == 1) { varfloor = cmd_ln_float32("-varfloor"); /* Sumveclen is overallocation, but coding is simpler */ for (j=0,sumveclen=0; j < n_stream; j++) sumveclen += veclen[j]; lmeans = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); lvars = (float32 ***) ckd_calloc_3d(n_state,n_stream,sumveclen,sizeof(float32)); } mixw_occ = (float32 ***)ckd_calloc_3d(n_state, n_stream, n_density, sizeof(float32)); dist = (float32 **)ckd_calloc_2d(n_stream, n_density, sizeof(float32)); dnom = (float64 *)ckd_calloc(n_stream, sizeof(float64)); /* Merge distributions of all the elements in a cluster for combined distribution */ for (s = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { float32 *lmeanvec=0, *lvarvec=0; if (continuous == 1) { lmeanvec = lmeans[s][j]; lvarvec = lvars[s][j]; } for (mm = 0; mm < n_id; mm++) { m = id[mm]; for (k = 0; k < n_density; k++) { mixw_occ[s][j][k] += mixw[m][s][j][k]; } /* For continuous hmms we have only one gaussian per state */ if (continuous == 1) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] += mixw[m][s][j][0] * means[m][s][j][k]; lvarvec[k] += mixw[m][s][j][0] * (vars[m][s][j][k] + means[m][s][j][k] * means[m][s][j][k]); } } } if (continuous == 1) { if (mixw_occ[s][j][0] != 0) { for (k = 0; k < veclen[j]; k++) { lmeanvec[k] /= mixw_occ[s][j][0]; lvarvec[k] = lvarvec[k]/mixw_occ[s][j][0] - lmeanvec[k]*lmeanvec[k]; if (lvarvec[k] < varfloor) lvarvec[k] = varfloor; } } else { for (k = 0; k < veclen[j]; k++) if (lmeanvec[k] != 0) E_FATAL("denominator = 0, but numerator = %f at k = %d\n",lmeanvec[k],k); } } } } /* Find out which state is under consideration */ for (j = 0, mx_wt = 0, s = 0; s < n_state; s++) { if (stwt[s] > mx_wt) { mx_wt = stwt[s]; j = s; } } /* occ is the same for each independent feature, so just choose 0 */ for (k = 0, occ = 0; k < n_density; k++) { occ += mixw_occ[j][0][k]; } for (s = 0, wt_ent = 0; s < n_state; s++) { for (j = 0; j < n_stream; j++) { for (k = 0, dnom[j] = 0; k < n_density; k++) { dnom[j] += mixw_occ[s][j][k]; } } for (j = 0, s_wt_ent = 0; j < n_stream; j++) { norm = 1.0 / dnom[j]; /* discrete_entropy for discrete case, continuous entropy for continuous HMMs */ if (continuous != 1) { for (k = 0; k < n_density; k++) { dist[j][k] = mixw_occ[s][j][k] * norm; if (dist[j][k] < mwfloor) dist[j][k] = mwfloor; } s_wt_ent += dnom[j] * ent_d(dist[j], n_density); } else { s_wt_ent += dnom[j] * ent_cont(lmeans[s][j], lvars[s][j], veclen[j]); } } wt_ent += stwt[s] * s_wt_ent; } node->node_id = node_id; l_id = ckd_calloc(n_id, sizeof(uint32)); for (j = 0; j < n_id; j++) { l_id[j] = id[j]; } node->id = l_id; node->n_id = n_id; node->mixw_occ = mixw_occ; if (continuous == 1) { node->means = lmeans; node->vars = lvars; } node->occ = occ; node->wt_ent = wt_ent; ckd_free_2d((void **)dist); ckd_free((void *)dnom); return S3_SUCCESS; }
int32 read_classdef_file(hash_table_t * classes, const char *file_name) { FILE *fp; int32 is_pipe; int inclass; /**< Are we currently reading a list of class words? */ int32 rv = -1; gnode_t *gn; glist_t classwords = NULL; glist_t classprobs = NULL; char *classname = NULL; if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { E_ERROR("File %s not found\n", file_name); return -1; } inclass = FALSE; while (!feof(fp)) { char line[512]; char *wptr[2]; int n_words; if (fgets(line, sizeof(line), fp) == NULL) break; n_words = str2words(line, wptr, 2); if (n_words <= 0) continue; if (inclass) { /* Look for an end of class marker. */ if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { classdef_t *classdef; gnode_t *word, *weight; int32 i; if (classname == NULL || 0 != strcmp(wptr[1], classname)) goto error_out; inclass = FALSE; /* Construct a class from the list of words collected. */ classdef = ckd_calloc(1, sizeof(*classdef)); classwords = glist_reverse(classwords); classprobs = glist_reverse(classprobs); classdef->n_words = glist_count(classwords); classdef->words = ckd_calloc(classdef->n_words, sizeof(*classdef->words)); classdef->weights = ckd_calloc(classdef->n_words, sizeof(*classdef->weights)); word = classwords; weight = classprobs; for (i = 0; i < classdef->n_words; ++i) { classdef->words[i] = gnode_ptr(word); classdef->weights[i] = gnode_float32(weight); word = gnode_next(word); weight = gnode_next(weight); } /* Add this class to the hash table. */ if (hash_table_enter(classes, classname, classdef) != classdef) { classdef_free(classdef); goto error_out; } /* Reset everything. */ glist_free(classwords); glist_free(classprobs); classwords = NULL; classprobs = NULL; classname = NULL; } else { float32 fprob; if (n_words == 2) fprob = atof_c(wptr[1]); else fprob = 1.0f; /* Add it to the list of words for this class. */ classwords = glist_add_ptr(classwords, ckd_salloc(wptr[0])); classprobs = glist_add_float32(classprobs, fprob); } } else { /* Start a new LM class if the LMCLASS marker is seen */ if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { if (inclass) goto error_out; inclass = TRUE; classname = ckd_salloc(wptr[1]); } /* Otherwise, just ignore whatever junk we got */ } } rv = 0; /* Success. */ error_out: /* Free all the stuff we might have allocated. */ fclose_comp(fp, is_pipe); for (gn = classwords; gn; gn = gnode_next(gn)) ckd_free(gnode_ptr(gn)); glist_free(classwords); glist_free(classprobs); ckd_free(classname); return rv; }
ps_mgau_t * ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef) { /* Codebooks */ ms_mgau_model_t *msg; ps_mgau_t *mg; gauden_t *g; senone_t *s; cmd_ln_t *config; int i; static ps_mgaufuncs_t ms_mgau_funcs = { "ms", ms_cont_mgau_frame_eval, /* frame_eval */ ms_mgau_mllr_transform, /* transform */ ms_mgau_free /* free */ }; config = acmod->config; msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t)); msg->config = config; msg->g = 0; msg->s = 0; g = msg->g = gauden_init(cmd_ln_str_r(config, "-mean"), cmd_ln_str_r(config, "-var"), cmd_ln_float32_r(config, "-varfloor"), lmath); /* Verify n_feat and veclen, against acmod. */ if (g->n_feat != feat_dimension1(acmod->fcb)) { E_ERROR("Number of streams does not match: %d != %d\n", g->n_feat, feat_dimension1(acmod->fcb)); goto error_out; } for (i = 0; i < g->n_feat; ++i) { if (g->featlen[i] != feat_dimension2(acmod->fcb, i)) { E_ERROR("Dimension of stream %d does not match: %d != %d\n", i, g->featlen[i], feat_dimension2(acmod->fcb, i)); goto error_out; } } s = msg->s = senone_init(msg->g, cmd_ln_str_r(config, "-mixw"), cmd_ln_str_r(config, "-senmgau"), cmd_ln_float32_r(config, "-mixwfloor"), lmath, mdef); s->aw = cmd_ln_int32_r(config, "-aw"); /* Verify senone parameters against gauden parameters */ if (s->n_feat != g->n_feat) E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, s->n_feat); if (s->n_cw != g->n_density) E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", g->n_density, s->n_cw); if ((int)s->n_gauden > g->n_mgau) E_FATAL("Senones need more codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); if ((int)s->n_gauden < g->n_mgau) E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", s->n_gauden, g->n_mgau); msg->topn = cmd_ln_int32_r(config, "-topn"); E_INFO("The value of topn: %d\n", msg->topn); if (msg->topn == 0 || msg->topn > msg->g->n_density) { E_WARN ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n", msg->topn, msg->g->n_density); msg->topn = msg->g->n_density; } msg->dist = (gauden_dist_t ***) ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn, sizeof(gauden_dist_t)); msg->mgau_active = (uint8*)ckd_calloc(g->n_mgau, sizeof(int8)); mg = (ps_mgau_t *)msg; mg->vt = &ms_mgau_funcs; return mg; error_out: ms_mgau_free(ps_mgau_base(msg)); return 0; }
int main(int argc, char *argv[]) { model_def_t *imdef; model_def_t *omdef; pset_t *pset; uint32 n_pset; dtree_t ***tree; uint32 n_seno; uint32 n_ci; uint32 n_acmod; uint32 p; uint32 s; model_def_entry_t *idefn, *odefn; acmod_id_t b, l, r; word_posn_t wp; int allphones; parse_cmd_ln(argc, argv); if (init(&imdef, &pset, &n_pset, &tree, &n_seno) != S3_SUCCESS) return 1; omdef = (model_def_t *)ckd_calloc(1, sizeof(model_def_t)); omdef->acmod_set = imdef->acmod_set; /* same set of acoustic models */ omdef->n_total_state = imdef->n_total_state; omdef->n_tied_ci_state = imdef->n_tied_ci_state; omdef->n_tied_state = imdef->n_tied_ci_state + n_seno; omdef->n_tied_tmat = imdef->n_tied_tmat; omdef->defn = (model_def_entry_t *)ckd_calloc(imdef->n_defn, sizeof(model_def_entry_t)); /* * Define the context-independent models */ n_ci = acmod_set_n_ci(imdef->acmod_set); for (p = 0; p < n_ci; p++) { idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) odefn->state[s] = NO_ID; else { odefn->state[s] = idefn->state[s]; } } } /* * Define the rest of the models */ allphones = cmd_ln_int32("-allphones"); n_acmod = acmod_set_n_acmod(omdef->acmod_set); for (; p < n_acmod; p++) { b = acmod_set_base_phone(omdef->acmod_set, p); assert(p != b); idefn = &imdef->defn[p]; odefn = &omdef->defn[p]; odefn->p = idefn->p; odefn->tmat = idefn->tmat; odefn->state = ckd_calloc(idefn->n_state, sizeof(uint32)); odefn->n_state = idefn->n_state; for (s = 0; s < idefn->n_state; s++) { if (idefn->state[s] == NO_ID) /* Non-emitting state */ odefn->state[s] = NO_ID; else { uint32 bb; /* emitting state: find the tied state */ acmod_set_id2tri(omdef->acmod_set, &b, &l, &r, &wp, p); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "%s %u ", acmod_set_id2name(omdef->acmod_set, p), s); #endif bb = allphones ? 0 : b; odefn->state[s] = tied_state(&tree[bb][s]->node[0], b, l, r, wp, pset); #ifdef HORRIBLY_VERBOSE fprintf(stderr, "\t-> %u\n", odefn->state[s]); fprintf(stderr, "\n"); #endif } } } if (model_def_write(omdef, cmd_ln_str("-omoddeffn")) != S3_SUCCESS) { return 1; } return 0; }
static void read_ngram_instance(lineiter_t ** li, hash_table_t * wid, logmath_t * lmath, int order, int order_max, ngram_raw_t * raw_ngram) { int n; int words_expected; int i; char *wptr[NGRAM_MAX_ORDER + 1]; uint32 *word_out; *li = lineiter_next(*li); if (*li == NULL) { E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n", order); return; } string_trim((*li)->buf, STRING_BOTH); words_expected = order + 1; if ((n = str2words((*li)->buf, wptr, NGRAM_MAX_ORDER + 1)) < words_expected) { if ((*li)->buf[0] != '\0') { E_WARN("Format error; %d-gram ignored: %s\n", order, (*li)->buf); } } else { if (order == order_max) { raw_ngram->weights = (float *) ckd_calloc(1, sizeof(*raw_ngram->weights)); raw_ngram->weights[0] = atof_c(wptr[0]); if (raw_ngram->weights[0] > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, raw_ngram->weights[0]); } else { float weight, backoff; raw_ngram->weights = (float *) ckd_calloc(2, sizeof(*raw_ngram->weights)); weight = atof_c(wptr[0]); if (weight > 0) { E_WARN("%d-gram [%s] has positive probability. Zeroize\n", order, wptr[1]); raw_ngram->weights[0] = 0.0f; } else { raw_ngram->weights[0] = logmath_log10_to_log_float(lmath, weight); } if (n == order + 1) { raw_ngram->weights[1] = 0.0f; } else { backoff = atof_c(wptr[order + 1]); raw_ngram->weights[1] = logmath_log10_to_log_float(lmath, backoff); } } raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (word_out = raw_ngram->words + order - 1, i = 1; word_out >= raw_ngram->words; --word_out, i++) { hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out); } } }
int init(model_def_t **out_imdef, pset_t **out_pset, uint32 *out_n_pset, dtree_t ****out_tree, uint32 *out_n_seno) { model_def_t *imdef; uint32 p, s; uint32 n_ci, n_state; char fn[MAXPATHLEN+1]; const char *a_fn; FILE *fp; dtree_t ***tree, *tr; pset_t *pset; uint32 n_pset; uint32 n_seno; const char *treedir; uint32 ts_id; int allphones; a_fn = cmd_ln_str("-imoddeffn"); if (a_fn == NULL) E_FATAL("Specify -imoddeffn\n"); if (model_def_read(&imdef, a_fn) != S3_SUCCESS) { return S3_ERROR; } *out_imdef = imdef; a_fn = cmd_ln_str("-psetfn"); E_INFO("Reading: %s\n", a_fn); *out_pset = pset = read_pset_file(a_fn, imdef->acmod_set, &n_pset); *out_n_pset = n_pset; allphones = cmd_ln_int32("-allphones"); if (allphones) n_ci = 1; else n_ci = acmod_set_n_ci(imdef->acmod_set); treedir = cmd_ln_str("-treedir"); tree = (dtree_t ***)ckd_calloc(n_ci, sizeof(dtree_t **)); *out_tree = tree; ts_id = imdef->n_tied_ci_state; for (p = 0, n_seno = 0; p < n_ci; p++) { if (allphones || !acmod_set_has_attrib(imdef->acmod_set, p, "filler")) { const char *pname; if (allphones) { n_state = imdef->defn[acmod_set_n_ci(imdef->acmod_set)].n_state; pname = "ALLPHONES"; } else { n_state = imdef->defn[p].n_state; pname = acmod_set_id2name(imdef->acmod_set, p); } tree[p] = (dtree_t **)ckd_calloc(n_state, sizeof(dtree_t *)); for (s = 0; s < n_state-1; s++) { E_INFO("%s-%u: offset %u\n", pname, s, ts_id); sprintf(fn, "%s/%s-%u.dtree", treedir, pname, s); fp = fopen(fn, "r"); if (fp == NULL) { E_FATAL_SYSTEM("Unable to open %s for reading", fn); } tree[p][s] = tr = read_final_tree(fp, pset, n_pset); label_leaves(&tr->node[0], &ts_id); fclose(fp); n_seno += cnt_leaf(&tr->node[0]); } } } assert(n_seno == (ts_id - imdef->n_tied_ci_state)); E_INFO("n_seno= %u\n", ts_id); *out_n_seno = n_seno; return S3_SUCCESS; }
int32 model_def_read(model_def_t **out_model_def, const char *file_name) { lineiter_t *li = NULL; uint32 n; char tag[32]; acmod_set_t *acmod_set; uint32 i, j; acmod_id_t acmod_id; uint32 tmat; uint32 n_state; uint32 n_tri; uint32 n_base; uint32 n_total_map; uint32 n_tied_state; uint32 n_tied_ci_state; uint32 n_tied_tmat; uint32 state[MAX_N_STATE]; uint32 n_total; model_def_t *omd; model_def_entry_t *mdef; uint32 *all_state; uint32 max_tmat; uint32 max_state; uint32 max_ci_state; FILE *fp; fp = fopen(file_name, "r"); if (fp == NULL) { E_WARN_SYSTEM("Unable to open %s for reading", file_name); return S3_ERROR; } li = lineiter_start_clean(fp); if (li == NULL) { E_ERROR("ERROR not even a version number in %s!?\n", file_name); fclose(fp); lineiter_free(li); return S3_ERROR; } if (strcmp(li->buf, MODEL_DEF_VERSION) != 0) { E_ERROR("ERROR version(%s) == \"%s\", but expected %s at line %d.\n", file_name, li->buf, MODEL_DEF_VERSION, lineiter_lineno(li)); fclose(fp); if (strcmp(li->buf, "0.1") == 0) { E_ERROR("You must add an attribute field to all the model records. See SPHINX-III File Formats manual\n"); } if (strcmp(li->buf, "0.2") == 0) { E_ERROR("You must add n_tied_state, n_tied_ci_state and n_tied_tmat definitions at the head of the file. See /net/alf19/usr2/eht/s3/cvtmdef.csh\n"); } lineiter_free(li); return S3_ERROR; } n_tri = n_base = n_total_map = n_tied_state = n_tied_ci_state = n_tied_tmat = NO_NUMBER; for ( i = 0; i < 6; i++) { li = lineiter_next(li); if (li == NULL) { E_ERROR("Incomplete count information in %s!?\n", file_name); fclose(fp); lineiter_free(li); return S3_ERROR; } sscanf(li->buf, "%u %s", &n, tag); if (strcmp(tag, "n_base") == 0) { n_base = n; } else if (strcmp(tag, "n_tri") == 0) { n_tri = n; } else if (strcmp(tag, "n_state_map") == 0) { n_total_map = n; } else if (strcmp(tag, "n_tied_state") == 0) { n_tied_state = n; } else if (strcmp(tag, "n_tied_ci_state") == 0) { n_tied_ci_state = n; } else if (strcmp(tag, "n_tied_tmat") == 0) { n_tied_tmat = n; } else { E_ERROR("Unknown tag %s in file at line %d\n", tag, lineiter_lineno(li)); fclose(fp); lineiter_free(li); return S3_ERROR; } } li = lineiter_next(li); *out_model_def = omd = ckd_calloc(1, sizeof(model_def_t)); omd->acmod_set = acmod_set = acmod_set_new(); /* give the acmod_set module some storage allocation requirements */ acmod_set_set_n_ci_hint(acmod_set, n_base); acmod_set_set_n_tri_hint(acmod_set, n_tri); n_total = n_base + n_tri; omd->defn = mdef = ckd_calloc(n_total, sizeof(model_def_entry_t)); omd->n_total_state = n_total_map; all_state = ckd_calloc(n_total_map, sizeof(uint32)); omd->n_tied_ci_state = n_tied_ci_state; omd->n_tied_state = n_tied_state; omd->n_tied_tmat = n_tied_tmat; omd->max_n_state = 0; omd->min_n_state = MAX_N_STATE; for (i = 0, j = 0, max_state = 0, max_ci_state = 0, max_tmat = 0; i < n_base; i++, j += n_state) { n_state = MAX_N_STATE; if (parse_base_line(li->buf, lineiter_lineno(li), &acmod_id, &tmat, state, &n_state, acmod_set) != S3_SUCCESS) { fclose(fp); lineiter_free(li); return S3_ERROR; } mdef[i].p = acmod_id; mdef[i].tmat = tmat; mdef[i].n_state = n_state; mdef[i].state = &all_state[j]; memcpy((char *)mdef[i].state, (const char *)state, n_state * sizeof(uint32)); update_totals(omd, &mdef[i]); li = lineiter_next(li); } for (; i < n_total; i++, j += n_state) { n_state = MAX_N_STATE; if (parse_tri_line(li->buf, lineiter_lineno(li), &acmod_id, &tmat, state, &n_state, acmod_set) != S3_SUCCESS) { fclose(fp); lineiter_free(li); return S3_ERROR; } mdef[i].p = acmod_id; mdef[i].tmat = tmat; mdef[i].n_state = n_state; mdef[i].state = &all_state[j]; memcpy((char *)mdef[i].state, (const char *)state, n_state * sizeof(uint32)); update_totals(omd, &mdef[i]); li = lineiter_next(li); } omd->n_defn = n_total; assert(j == n_total_map); E_INFO("Model definition info:\n"); E_INFO("%u total models defined (%u base, %u tri)\n", omd->n_defn, n_base, n_tri); E_INFO("%u total states\n", omd->n_total_state); E_INFO("%u total tied states\n", omd->n_tied_state); E_INFO("%u total tied CI states\n", omd->n_tied_ci_state); E_INFO("%u total tied transition matrices\n", omd->n_tied_tmat); E_INFO("%u max state/model\n", omd->max_n_state); E_INFO("%u min state/model\n", omd->min_n_state); fclose(fp); lineiter_free(li); return S3_SUCCESS; }
dtree_t * mk_tree_comp(float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 *id, uint32 n_id, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 n_base_phone, uint32 **dfeat, uint32 n_dfeat, uint32 split_min, uint32 split_max, float32 split_thr, uint32 split_min_comp, uint32 split_max_comp, float32 split_thr_comp, float32 mwfloor) { dtree_t *comp_tree; dtree_node_t *root, *b_n; uint32 i; comp_tree = ckd_calloc(1, sizeof(dtree_t)); comp_tree->node = ckd_calloc(2*split_max_comp+1, sizeof(dtree_node_t)); comp_tree->n_node = 0; comp_tree->node[0].node_id = 0; comp_tree->n_node = 1; root = &comp_tree->node[0]; mk_node(root, 0, id, n_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, mwfloor); root->q = (void *)mk_comp_quest(&root->wt_ent_dec, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, id, n_id, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); for (i = 0; i < split_max_comp; i++) { b_n = best_leaf_node(root); E_INFO("Comp split %u\n", i); if (b_n == NULL) { E_INFO("stop. leaf nodes are specific\n"); break; } if (b_n->wt_ent_dec <= 0) { E_INFO("stop. b_n->wt_ent_dec (%.3e) <= 0\n", b_n->wt_ent_dec); break; } if ((i > split_min_comp) && (b_n->wt_ent_dec < split_thr_comp * b_n->wt_ent)) { E_INFO("stop. b_n->wt_ent_dec <= split_thr_comp * b_n->wt_ent. %.3e <= %.3e\n", b_n->wt_ent_dec, split_thr_comp * b_n->wt_ent); break; } split_node_comp(comp_tree, b_n->node_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, n_base_phone, dfeat, n_dfeat, split_min, split_max, split_thr, mwfloor); #if 0 printf("Comp Split %u:\n", i); print_tree_comp(stderr, "*", root, pset, 0); fprintf(stderr, "\n"); #endif } #if 0 E_INFO("Final Comp Tree %u:\n", i); print_tree_comp(stderr, "", root, pset, 0); fprintf(stderr, "\n"); #endif return comp_tree; }
int main(int argc, char *argv[]) { acmod_t *acmod; logmath_t *lmath; cmd_ln_t *config; FILE *rawfh; int16 *buf; int16 const *bptr; mfcc_t **cepbuf, **cptr; size_t nread, nsamps; int nfr; int frame_counter; int bestsen1[270]; lmath = logmath_init(1.0001, 0, 0); config = cmd_ln_init(NULL, ps_args(), TRUE, "-featparams", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/feat.params", "-mdef", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/mdef", "-mean", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/means", "-var", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/variances", "-tmat", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/transition_matrices", "-sendump", MODELDIR "/hmm/en_US/hub4wsj_sc_8k/sendump", "-compallsen", "true", "-cmn", "prior", "-tmatfloor", "0.0001", "-mixwfloor", "0.001", "-varfloor", "0.0001", "-mmap", "no", "-topn", "4", "-ds", "1", "-input_endian", "little", "-samprate", "16000", NULL); TEST_ASSERT(config); TEST_ASSERT(acmod = acmod_init(config, lmath, NULL, NULL)); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = 2048; frame_counter = 0; buf = ckd_calloc(nsamps, sizeof(*buf)); TEST_ASSERT(rawfh = fopen(DATADIR "/goforward.raw", "rb")); TEST_EQUAL(0, acmod_start_utt(acmod)); E_INFO("Incremental(2048):\n"); while (!feof(rawfh)) { nread = fread(buf, sizeof(*buf), nsamps, rawfh); bptr = buf; while ((nfr = acmod_process_raw(acmod, &bptr, &nread, FALSE)) > 0 || nread > 0) { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) bestsen1[frame_counter] = best_score; ++frame_counter; frame_idx = -1; } } } TEST_EQUAL(0, acmod_end_utt(acmod)); nread = 0; { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) bestsen1[frame_counter] = best_score; TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now try to process the whole thing at once. */ E_INFO("Whole utterance:\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); nsamps = ftell(rawfh) / sizeof(*buf); clearerr(rawfh); fseek(rawfh, 0, SEEK_SET); buf = ckd_realloc(buf, nsamps * sizeof(*buf)); TEST_EQUAL(nsamps, fread(buf, sizeof(*buf), nsamps, rawfh)); bptr = buf; TEST_EQUAL(0, acmod_start_utt(acmod)); acmod_process_raw(acmod, &bptr, &nsamps, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Now process MFCCs and make sure we get the same results. */ cepbuf = ckd_calloc_2d(frame_counter, fe_get_output_size(acmod->fe), sizeof(**cepbuf)); fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Incremental(MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; frame_counter = 0; while ((acmod_process_cep(acmod, &cptr, &nfr, FALSE)) > 0) { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } TEST_EQUAL(0, acmod_end_utt(acmod)); nfr = 0; acmod_process_cep(acmod, &cptr, &nfr, FALSE); { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); TEST_EQUAL(frame_counter, frame_idx); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); ++frame_counter; frame_idx = -1; } } /* Note that we have to process the whole thing again because * !#@$@ s2mfc2feat modifies its argument (not for long) */ fe_start_utt(acmod->fe); nsamps = ftell(rawfh) / sizeof(*buf); bptr = buf; nfr = frame_counter; fe_process_frames(acmod->fe, &bptr, &nsamps, cepbuf, &nfr); fe_end_utt(acmod->fe, cepbuf[frame_counter-1], &nfr); E_INFO("Whole utterance (MFCC):\n"); cmn_prior_set(acmod->fcb->cmn_struct, prior); TEST_EQUAL(0, acmod_start_utt(acmod)); cptr = cepbuf; nfr = frame_counter; acmod_process_cep(acmod, &cptr, &nfr, TRUE); TEST_EQUAL(0, acmod_end_utt(acmod)); { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } E_INFO("Rewound (MFCC):\n"); TEST_EQUAL(0, acmod_rewind(acmod)); { int16 const *senscr; int16 best_score; int frame_idx = -1, best_senid; frame_counter = 0; while (acmod->n_feat_frame > 0) { senscr = acmod_score(acmod, &frame_idx); acmod_advance(acmod); best_score = acmod_best_score(acmod, &best_senid); E_INFO("Frame %d best senone %d score %d\n", frame_idx, best_senid, best_score); if (frame_counter < 190) TEST_EQUAL_LOG(best_score, bestsen1[frame_counter]); TEST_EQUAL(frame_counter, frame_idx); ++frame_counter; frame_idx = -1; } } /* Clean up, go home. */ ckd_free_2d(cepbuf); fclose(rawfh); ckd_free(buf); acmod_free(acmod); logmath_free(lmath); cmd_ln_free_r(config); return 0; }
dtree_t * mk_tree(float32 ****mixw, float32 ****means, float32 ****vars, uint32 *veclen, uint32 n_model, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, uint32 *id, uint32 n_id, quest_t *all_q, uint32 n_all_q, pset_t *pset, uint32 **dfeat, uint32 n_dfeat, uint32 split_min, uint32 split_max, float32 split_thr, float32 mwfloor) { dtree_t *s_tree; uint32 i; dtree_node_t *b_n, *root; s_tree = ckd_calloc(1, sizeof(dtree_t)); s_tree->node = ckd_calloc(2*split_max + 1, sizeof(dtree_node_t)); s_tree->n_node = 0; s_tree->node[0].node_id = 0; s_tree->n_node = 1; root = &s_tree->node[0]; mk_node(root, 0, id, n_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, mwfloor); set_best_quest(root, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, dfeat, n_dfeat, mwfloor); if (root->q == NULL) { /* No question found that is able to split node; can't go any further */ free_tree(s_tree); return NULL; } for (i = 0; i < split_max; i++) { b_n = best_leaf_node(root); if (b_n == NULL) { E_INFO("stop. leaf nodes are specific\n"); break; } /* DDDDDBUG The following criteria will fail if we use only likelihood and no likelihood increase */ if (b_n->wt_ent_dec <= 0) { E_INFO("stop. b_n->wt_ent_dec (%.3e) <= 0\n", b_n->wt_ent_dec); break; } if ((i > split_min) && (b_n->wt_ent_dec < split_thr * b_n->wt_ent)) { E_INFO("stop. b_n->wt_ent_dec (%.3e) < split_thr * b_n->wt_ent (%.3e)\n", b_n->wt_ent_dec, b_n->wt_ent * split_thr); break; } split_node(s_tree, b_n->node_id, mixw, means, vars, veclen, n_model, n_state, n_stream, n_density, stwt, all_q, n_all_q, pset, dfeat, n_dfeat, mwfloor); } #if 1 E_INFO("Final simple tree\n"); print_tree(stderr, "|", root, pset, 0); fprintf(stderr, "\n"); #endif return s_tree; }
/* * Read specified segment [sf-win..ef+win] of Sphinx-II format mfc file read and return * #frames read. Return -1 if error. */ int32 feat_s2mfc_read(char *file, int32 win, int32 sf, int32 ef, mfcc_t ***out_mfc, int32 maxfr, int32 cepsize) { FILE *fp; int32 n_float32; float32 *float_feat; struct stat statbuf; int32 i, n, byterev; int32 start_pad, end_pad; mfcc_t **mfc; /* Initialize the output pointer to NULL, so that any attempts to free() it if we fail before allocating it will not segfault! */ if (out_mfc) *out_mfc = NULL; E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); if (ef >= 0 && ef <= sf) { E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); return -1; } /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ if ((stat_retry(file, &statbuf) < 0) || ((fp = fopen(file, "rb")) == NULL)) { E_ERROR("Failed to open file '%s' for reading: %s\n", file, strerror(errno)); return -1; } /* Read #floats in header */ if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { E_ERROR("%s: fread(#floats) failed\n", file); fclose(fp); return -1; } /* Check if n_float32 matches file size */ byterev = 0; if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ n = n_float32; SWAP_INT32(&n); if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ E_ERROR ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", file, n_float32, n_float32, statbuf.st_size, statbuf.st_size); fclose(fp); return -1; } n_float32 = n; byterev = 1; } if (n_float32 <= 0) { E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); fclose(fp); return -1; } /* Convert n to #frames of input */ n = n_float32 / cepsize; if (n * cepsize != n_float32) { E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, cepsize); fclose(fp); return -1; } /* Check start and end frames */ if (sf > 0) { if (sf >= n) { E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, sf, n); fclose(fp); return -1; } } if (ef < 0) ef = n-1; else if (ef >= n) { E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", file, ef, n); ef = n-1; } /* Add window to start and end frames */ sf -= win; ef += win; if (sf < 0) { start_pad = -sf; sf = 0; } else start_pad = 0; if (ef >= n) { end_pad = ef - n + 1; ef = n - 1; } else end_pad = 0; /* Limit n if indicated by [sf..ef] */ if ((ef - sf + 1) < n) n = (ef - sf + 1); if (maxfr > 0 && n + start_pad + end_pad > maxfr) { E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", file, maxfr, n + start_pad + end_pad); fclose(fp); return -1; } /* If no output buffer was supplied, then skip the actual data reading. */ if (out_mfc != NULL) { /* Position at desired start frame and read actual MFC data */ mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); if (sf > 0) fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); n_float32 = n * cepsize; #ifdef FIXED_POINT float_feat = ckd_calloc(n_float32, sizeof(float32)); #else float_feat = mfc[start_pad]; #endif if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); ckd_free_2d(mfc); fclose(fp); return -1; } if (byterev) { for (i = 0; i < n_float32; i++) { SWAP_FLOAT32(&float_feat[i]); } } #ifdef FIXED_POINT for (i = 0; i < n_float32; ++i) { mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); } ckd_free(float_feat); #endif /* Replicate start and end frames if necessary. */ for (i = 0; i < start_pad; ++i) memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); for (i = 0; i < end_pad; ++i) memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], cepsize * sizeof(mfcc_t)); *out_mfc = mfc; } fclose(fp); return n + start_pad + end_pad; }
void cluster_leaves(dtree_t *tr, uint32 *veclen, float64 *wt_ent_dec, uint32 *out_n_a, uint32 *out_n_b, pset_t *pset, uint32 n_state, uint32 n_stream, uint32 n_density, float32 *stwt, float32 mwfloor) { uint32 n_leaf; float32 ****mixw_occ; uint32 *clust, n_a, n_b; uint32 *node_id; dtree_node_t *root; uint32 i; float32 ****means=0; float32 ****vars=0; const char* type; uint32 continuous, sumveclen; type = cmd_ln_str("-ts2cbfn"); if (strcmp(type,".semi.")!=0 && strcmp(type,".cont.") != 0) E_FATAL("Type %s unsupported; trees can only be built on types .semi. or .cont.\n",type); if (strcmp(type,".cont.") == 0) continuous = 1; else continuous = 0; root = &tr->node[0]; /* determine the # of leaf nodes in the simple tree */ n_leaf = cnt_leaf(root); if (continuous == 1) { for (i=0,sumveclen=0; i < n_stream; i++) sumveclen += veclen[i]; means = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, sumveclen, sizeof(float32)); vars = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, sumveclen, sizeof(float32)); } /* Alloc space for: * - leaf node distribution array * - leaf node cluster id array * - leaf node id array */ mixw_occ = (float32 ****)ckd_calloc_4d(n_leaf, n_state, n_stream, n_density, sizeof(float32)); clust = (uint32 *)ckd_calloc(n_leaf, sizeof(uint32)); node_id = (uint32 *)ckd_calloc(n_leaf, sizeof(uint32)); /* compute the density occupancies of the leaves */ leaf_mixw_occ(root, pset, mixw_occ, node_id, n_state, n_stream, n_density, 0); if (continuous == 1) { /* compute means and variances of the leaves */ leaf_mean_vars(root, pset, means, vars, node_id, n_state, n_stream, veclen, 0); } /* Cluster the leaf nodes into two classes */ *wt_ent_dec = two_class(mixw_occ, means, vars, veclen, n_leaf, n_state, n_stream, n_density, stwt, clust, mwfloor); for (i = 0; i < n_leaf; i++) { tr->node[node_id[i]].clust = clust[i]; } /* Simplify the tree based on the two classes * (i.e. if siblings belong to the same class, * delete the node) */ prune_leaves(root, pset); /* Determine how many leaf nodes in class A and B * in the simplified tree */ n_a = n_b = 0; cnt_class(root, &n_a, &n_b); #if 0 fprintf(stderr, "Pruned tree %u/%u:\n", n_a, n_b); print_tree(stderr, "|", root, pset, 1); fprintf(stderr, "\n"); #endif *out_n_a = n_a; *out_n_b = n_b; }
static int32 interp_read(interp_t * ip, const char *file_name) { FILE *fp; int32 byteswap, chksum_present; int32 i; char eofchk; float f; char **argname, **argval; uint32 chksum; E_INFO("Reading interpolation weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], INTERP_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], INTERP_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones */ if (bio_fread(&(ip->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraysize) failed\n", file_name); if (ip->n_sen <= 0) E_FATAL("%s: arraysize= %d in header\n", file_name, ip->n_sen); ip->wt = (struct interp_wt_s *) ckd_calloc(ip->n_sen, sizeof(struct interp_wt_s)); for (i = 0; i < ip->n_sen; i++) { if (bio_fread(&f, sizeof(float32), 1, fp, byteswap, &chksum) != 1) E_FATAL("fread(%s) (arraydata) failed\n", file_name); if ((f < 0.0) || (f > 1.0)) E_FATAL("%s: interpolation weight(%d)= %e\n", file_name, i, f); ip->wt[i].cd = (f == 0.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, f); ip->wt[i].ci = (f == 1.0) ? S3_LOGPROB_ZERO : logs3(ip->logmath, 1.0 - f); } if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d interpolation weights\n", ip->n_sen); return 1; }
int32 logs3_init (float64 base) { int32 i, k; float64 d, t, f; E_INFO("Initializing logbase: %e\n", base); if (base <= 1.0) E_FATAL("Illegal logbase: %e; must be > 1.0\n", base); if (add_tbl) { if (B == base) E_WARN("logs3_init() already done\n"); else E_FATAL("logs3_init() already done with base %e\n", B); } B = base; logB = log(base); invlogB = 1.0/logB; invlog10B = 1.0/log10(base); /* Create add-table for adding probs in log domain */ k = (int32) (log(2.0)*invlogB + 0.5); if (k > 65535) { E_ERROR("Logbase too small: %e; needs int32 addtable[]\n", base); return -1; } d = 1.0; f = 1.0/B; /* Figure out size of add-table requried */ for (i = 0;; i++) { t = log(1.0+d)*invlogB; k = (int32) (t + 0.5); #if 0 if (((i%1000) == 0) || (k == 0)) printf ("%10d %10d %e\n", i, k, d); #endif if (k == 0) break; d *= f; } add_tbl_size = i+1; add_tbl = (uint16 *) ckd_calloc (i+1, sizeof(uint16)); /* Fill add-table */ d = 1.0; for (i = 0;; i++) { t = log(1.0+d)*invlogB; k = (int32) (t + 0.5); add_tbl[i] = k; if (k == 0) break; d *= f; } E_INFO("Log-Add table size = %d\n", add_tbl_size); return 0; }