ngram_model_t * ngram_model_set_init(cmd_ln_t * config, ngram_model_t ** models, char **names, const float32 * weights, int32 n_models) { ngram_model_set_t *model; ngram_model_t *base; logmath_t *lmath; int32 i, n; if (n_models == 0) /* WTF */ return NULL; /* Do consistency checking on the models. They must all use the * same logbase and shift. */ lmath = models[0]->lmath; for (i = 1; i < n_models; ++i) { if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath) || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) { E_ERROR ("Log-math parameters don't match, will not create LM set\n"); return NULL; } } /* Allocate the combined model, initialize it. */ model = ckd_calloc(1, sizeof(*model)); base = &model->base; model->n_models = n_models; model->lms = ckd_calloc(n_models, sizeof(*model->lms)); model->names = ckd_calloc(n_models, sizeof(*model->names)); /* Initialize weights to a uniform distribution */ model->lweights = ckd_calloc(n_models, sizeof(*model->lweights)); { int32 uniform = logmath_log(lmath, 1.0 / n_models); for (i = 0; i < n_models; ++i) model->lweights[i] = uniform; } /* Default to interpolate if weights were given. */ if (weights) model->cur = -1; n = 0; for (i = 0; i < n_models; ++i) { model->lms[i] = ngram_model_retain(models[i]); model->names[i] = ckd_salloc(names[i]); if (weights) model->lweights[i] = logmath_log(lmath, weights[i]); /* N is the maximum of all merged models. */ if (models[i]->n > n) n = models[i]->n; } /* Allocate the history mapping table. */ model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist)); /* Now build the word-ID mapping and merged vocabulary. */ build_widmap(base, lmath, n); return base; }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule, logmath_t * lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; int rule_entry, rule_exit; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; grammar->nstate = 0; /* Create the top-level entry state, and expand the top-level rule. */ rule_entry = grammar->nstate++; rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); /* If no exit-state was created, create one. */ if (rule_exit == NO_NODE) { rule_exit = grammar->nstate++; jsgf_add_link(grammar, NULL, rule_entry, rule_exit); } fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule_entry; fsg->final_state = rule_exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
ngram_model_t * ngram_model_set_add(ngram_model_t * base, ngram_model_t * model, const char *name, float32 weight, int reuse_widmap) { ngram_model_set_t *set = (ngram_model_set_t *) base; float32 fprob; int32 scale, i; /* Add it to the array of lms. */ ++set->n_models; set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms)); set->lms[set->n_models - 1] = model; set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names)); set->names[set->n_models - 1] = ckd_salloc(name); /* Expand the history mapping table if necessary. */ if (model->n > base->n) { base->n = model->n; set->maphist = ckd_realloc(set->maphist, (model->n - 1) * sizeof(*set->maphist)); } /* Renormalize the interpolation weights. */ fprob = weight * 1.0f / set->n_models; set->lweights = ckd_realloc(set->lweights, set->n_models * sizeof(*set->lweights)); set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob); /* Now normalize everything else to fit it in. This is * accomplished by simply scaling all the other probabilities * by (1-fprob). */ scale = logmath_log(base->lmath, 1.0 - fprob); for (i = 0; i < set->n_models - 1; ++i) set->lweights[i] += scale; /* Reuse the old word ID mapping if requested. */ if (reuse_widmap) { int32 **new_widmap; /* Tack another column onto the widmap array. */ new_widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, sizeof(**new_widmap)); for (i = 0; i < base->n_words; ++i) { /* Copy all the existing mappings. */ memcpy(new_widmap[i], set->widmap[i], (set->n_models - 1) * sizeof(**new_widmap)); /* Create the new mapping. */ new_widmap[i][set->n_models - 1] = ngram_wid(model, base->word_str[i]); } ckd_free_2d((void **) set->widmap); set->widmap = new_widmap; } else { build_widmap(base, base->lmath, base->n); } return model; }
int32 ngram_model_add_class_word(ngram_model_t * model, const char *classname, const char *word, float32 weight) { ngram_class_t *lmclass; int32 classid, tag_wid, wid, i, scale; float32 fprob; /* Find the class corresponding to classname. Linear search * probably okay here since there won't be very many classes, and * this doesn't have to be fast. */ tag_wid = ngram_wid(model, classname); if (tag_wid == NGRAM_INVALID_WID) { E_ERROR("No such word or class tag: %s\n", classname); return tag_wid; } for (classid = 0; classid < model->n_classes; ++classid) { if (model->classes[classid]->tag_wid == tag_wid) break; } /* Hmm, no such class. It's probably not a good idea to create one. */ if (classid == model->n_classes) { E_ERROR ("Word %s is not a class tag (call ngram_model_add_class() first)\n", classname); return NGRAM_INVALID_WID; } lmclass = model->classes[classid]; /* Add this word to the model's set of words. */ wid = ngram_add_word_internal(model, word, classid); if (wid == NGRAM_INVALID_WID) return wid; /* This is the fixed probability of the new word. */ fprob = weight * 1.0f / (lmclass->n_words + lmclass->n_hash_inuse + 1); /* Now normalize everything else to fit it in. This is * accomplished by simply scaling all the other probabilities * by (1-fprob). */ scale = logmath_log(model->lmath, 1.0 - fprob); for (i = 0; i < lmclass->n_words; ++i) lmclass->prob1[i] += scale; for (i = 0; i < lmclass->n_hash; ++i) if (lmclass->nword_hash[i].wid != -1) lmclass->nword_hash[i].prob1 += scale; /* Now add it to the class hash table. */ return ngram_class_add_word(lmclass, wid, logmath_log(model->lmath, fprob)); }
static int write_lattice(ps_decoder_t *ps, char const *latdir, char const *uttid) { ps_lattice_t *lat; logmath_t *lmath; cmd_ln_t *config; char *outfile; int32 beam; if ((lat = ps_get_lattice(ps)) == NULL) { E_ERROR("Failed to obtain word lattice for utterance %s\n", uttid); return -1; } config = ps_get_config(ps); outfile = string_join(latdir, "/", uttid, cmd_ln_str_r(config, "-outlatext"), NULL); /* Prune lattice. */ lmath = ps_get_logmath(ps); beam = logmath_log(lmath, cmd_ln_float64_r(config, "-outlatbeam")); ps_lattice_posterior_prune(lat, beam); if (0 == strcmp("htk", cmd_ln_str_r(config, "-outlatfmt"))) { if (ps_lattice_write_htk(lat, outfile) < 0) { E_ERROR("Failed to write lattice to %s\n", outfile); return -1; } } else { if (ps_lattice_write(lat, outfile) < 0) { E_ERROR("Failed to write lattice to %s\n", outfile); return -1; } } return 0; }
ngram_model_t * ngram_model_set_interp(ngram_model_t * base, const char **names, const float32 * weights) { ngram_model_set_t *set = (ngram_model_set_t *) base; /* If we have a set of weights here, then set them. */ if (names && weights) { int32 i, j; /* We hope there aren't many models. */ for (i = 0; i < set->n_models; ++i) { for (j = 0; j < set->n_models; ++j) if (0 == strcmp(names[i], set->names[j])) break; if (j == set->n_models) { E_ERROR("Unknown LM name %s\n", names[i]); return NULL; } set->lweights[j] = logmath_log(base->lmath, weights[i]); } } else if (weights) { memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights)); } /* Otherwise just enable existing weights. */ set->cur = -1; return base; }
static int32 lm_trie_add_ug(ngram_model_t * base, int32 wid, int32 lweight) { ngram_model_trie_t *model = (ngram_model_trie_t *) base; /* This would be very bad if this happened! */ assert(!NGRAM_IS_CLASSWID(wid)); /* Reallocate unigram array. */ model->trie->unigrams = (unigram_t *) ckd_realloc(model->trie->unigrams, sizeof(*model->trie->unigrams) * (base->n_1g_alloc + 1)); memset(model->trie->unigrams + (base->n_counts[0] + 1), 0, (size_t) (base->n_1g_alloc - base->n_counts[0]) * sizeof(*model->trie->unigrams)); ++base->n_counts[0]; lweight += logmath_log(base->lmath, 1.0 / base->n_counts[0]); model->trie->unigrams[wid + 1].next = model->trie->unigrams[wid].next; model->trie->unigrams[wid].prob = (float) lweight; /* This unigram by definition doesn't participate in any bigrams, * so its backoff weight is undefined and next pointer same as in finish unigram*/ model->trie->unigrams[wid].bo = 0; /* Finally, increase the unigram count */ /* FIXME: Note that this can actually be quite bogus due to the * presence of class words. If wid falls outside the unigram * count, increase it to compensate, at the cost of no longer * really knowing how many unigrams we have :( */ if ((uint32) wid >= base->n_counts[0]) base->n_counts[0] = wid + 1; return (int32) weight_score(base, lweight); }
int32 ngram_model_add_word(ngram_model_t * model, const char *word, float32 weight) { int32 wid, prob = model->log_zero; /* If we add word to unwritable model, we need to make it writable */ if (!model->writable) { E_WARN("Can't add word '%s' to read-only language model. " "Disable mmap with '-mmap no' to make it writable\n", word); return -1; } wid = ngram_add_word_internal(model, word, -1); if (wid == NGRAM_INVALID_WID) return wid; /* Do what needs to be done to add the word to the unigram. */ if (model->funcs && model->funcs->add_ug) prob = (*model->funcs->add_ug) (model, wid, logmath_log(model->lmath, weight)); if (prob == 0) return -1; return wid; }
ngram_class_t * ngram_class_new(ngram_model_t * model, int32 tag_wid, int32 start_wid, glist_t classwords) { ngram_class_t *lmclass; gnode_t *gn; float32 tprob; int i; lmclass = ckd_calloc(1, sizeof(*lmclass)); lmclass->tag_wid = tag_wid; /* wid_base is the wid (minus class tag) of the first word in the list. */ lmclass->start_wid = start_wid; lmclass->n_words = glist_count(classwords); lmclass->prob1 = ckd_calloc(lmclass->n_words, sizeof(*lmclass->prob1)); lmclass->nword_hash = NULL; lmclass->n_hash = 0; tprob = 0.0; for (gn = classwords; gn; gn = gnode_next(gn)) { tprob += gnode_float32(gn); } if (tprob > 1.1 || tprob < 0.9) { E_INFO("Total class probability is %f, will normalize\n", tprob); for (gn = classwords; gn; gn = gnode_next(gn)) { gn->data.fl /= tprob; } } for (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) { lmclass->prob1[i] = logmath_log(model->lmath, gnode_float32(gn)); } return lmclass; }
int fsg_model_add_silence(fsg_model_t * fsg, char const *silword, int state, float32 silprob) { int32 logsilp; int n_trans, silwid, src; E_INFO("Adding silence transitions for %s to FSG\n", silword); silwid = fsg_model_word_add(fsg, silword); logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); if (fsg->silwords == NULL) fsg->silwords = bitvec_alloc(fsg->n_word_alloc); bitvec_set(fsg->silwords, silwid); n_trans = 0; if (state == -1) { for (src = 0; src < fsg->n_state; src++) { fsg_model_trans_add(fsg, src, src, logsilp, silwid); ++n_trans; } } else { fsg_model_trans_add(fsg, state, state, logsilp, silwid); ++n_trans; } E_INFO("Added %d silence word transitions\n", n_trans); return n_trans; }
static fsg_model_t * jsgf_build_fsg_internal(jsgf_t *grammar, jsgf_rule_t *rule, logmath_t *lmath, float32 lw, int do_closure) { fsg_model_t *fsg; glist_t nulls; gnode_t *gn; /* Clear previous links */ for (gn = grammar->links; gn; gn = gnode_next(gn)) { ckd_free(gnode_ptr(gn)); } glist_free(grammar->links); grammar->links = NULL; rule->entry = rule->exit = 0; grammar->nstate = 0; expand_rule(grammar, rule); fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); fsg->start_state = rule->entry; fsg->final_state = rule->exit; grammar->links = glist_reverse(grammar->links); for (gn = grammar->links; gn; gn = gnode_next(gn)) { jsgf_link_t *link = gnode_ptr(gn); if (link->atom) { if (jsgf_atom_is_rule(link->atom)) { fsg_model_null_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight)); } else { int wid = fsg_model_word_add(fsg, link->atom->name); fsg_model_trans_add(fsg, link->from, link->to, logmath_log(lmath, link->atom->weight), wid); } } else { fsg_model_null_trans_add(fsg, link->from, link->to, 0); } } if (do_closure) { nulls = fsg_model_null_trans_closure(fsg, NULL); glist_free(nulls); } return fsg; }
static int trie_apply_weights(ngram_model_t * base, float32 lw, float32 wip) { //just update weights that are going to be used on score calculation base->lw = lw; base->log_wip = logmath_log(base->lmath, wip); return 0; }
ngram_model_t * ngram_model_set_remove(ngram_model_t * base, const char *name, int reuse_widmap) { ngram_model_set_t *set = (ngram_model_set_t *) base; ngram_model_t *submodel; int32 lmidx, scale, n, i; float32 fprob; for (lmidx = 0; lmidx < set->n_models; ++lmidx) if (0 == strcmp(name, set->names[lmidx])) break; if (lmidx == set->n_models) return NULL; submodel = set->lms[lmidx]; /* Renormalize the interpolation weights by scaling them by * 1/(1-fprob) */ fprob = (float32) logmath_exp(base->lmath, set->lweights[lmidx]); scale = logmath_log(base->lmath, 1.0 - fprob); /* Remove it from the array of lms, renormalize remaining weights, * and recalcluate n. */ --set->n_models; n = 0; ckd_free(set->names[lmidx]); set->names[lmidx] = NULL; for (i = 0; i < set->n_models; ++i) { if (i >= lmidx) { set->lms[i] = set->lms[i + 1]; set->names[i] = set->names[i + 1]; set->lweights[i] = set->lweights[i + 1]; } set->lweights[i] -= scale; if (set->lms[i]->n > n) n = set->lms[i]->n; } /* There's no need to shrink these arrays. */ set->lms[set->n_models] = NULL; set->lweights[set->n_models] = base->log_zero; /* No need to shrink maphist either. */ /* Reuse the existing word ID mapping if requested. */ if (reuse_widmap) { /* Just go through and shrink each row. */ for (i = 0; i < base->n_words; ++i) { memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1, (set->n_models - lmidx) * sizeof(**set->widmap)); } } else { build_widmap(base, base->lmath, n); } return submodel; }
static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) { phone_loop_search_t *pls = (phone_loop_search_t *)search; cmd_ln_t *config = ps_search_config(search); acmod_t *acmod = ps_search_acmod(search); int i; /* Free old dict2pid, dict, if necessary. */ ps_search_base_reinit(search, dict, d2p); /* Initialize HMM context. */ if (pls->hmmctx) hmm_context_free(pls->hmmctx); pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), acmod->tmat->tp, NULL, acmod->mdef->sseq); if (pls->hmmctx == NULL) return -1; /* Initialize phone HMMs. */ if (pls->phones) { for (i = 0; i < pls->n_phones; ++i) hmm_deinit((hmm_t *)&pls->phones[i]); ckd_free(pls->phones); } pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones)); for (i = 0; i < pls->n_phones; ++i) { pls->phones[i].ciphone = i; hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i], FALSE, bin_mdef_pid2ssid(acmod->mdef, i), bin_mdef_pid2tmatid(acmod->mdef, i)); } pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")); pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")); pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip")); E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", pls->beam, pls->pbeam, pls->pip); return 0; }
int32 ngram_model_init(ngram_model_t *base, ngram_funcs_t *funcs, logmath_t *lmath, int32 n, int32 n_unigram) { base->refcount = 1; base->funcs = funcs; base->n = n; /* If this was previously initialized... */ if (base->n_counts == NULL) base->n_counts = ckd_calloc(3, sizeof(*base->n_counts)); /* Don't reset weights if logmath object hasn't changed. */ if (base->lmath != lmath) { /* Set default values for weights. */ base->lw = 1.0; base->log_wip = 0; /* i.e. 1.0 */ base->log_uw = 0; /* i.e. 1.0 */ base->log_uniform = logmath_log(lmath, 1.0 / n_unigram); base->log_uniform_weight = logmath_get_zero(lmath); base->log_zero = logmath_get_zero(lmath); base->lmath = lmath; } /* Allocate or reallocate space for word strings. */ if (base->word_str) { /* Free all previous word strings if they were allocated. */ if (base->writable) { int32 i; for (i = 0; i < base->n_words; ++i) { ckd_free(base->word_str[i]); base->word_str[i] = NULL; } } base->word_str = ckd_realloc(base->word_str, n_unigram * sizeof(char *)); } else base->word_str = ckd_calloc(n_unigram, sizeof(char *)); /* NOTE: They are no longer case-insensitive since we are allowing * other encodings for word strings. Beware. */ if (base->wid) hash_table_empty(base->wid); else base->wid = hash_table_new(n_unigram, FALSE); base->n_counts[0] = base->n_1g_alloc = base->n_words = n_unigram; return 0; }
/* * Some of the gaussian density computation can be carried out in advance: * log(determinant) calculation, * 1/(2*var) in the exponent, * NOTE; The density computation is performed in log domain. */ static int32 gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor) { int32 i, m, f, d, flen; mfcc_t *meanp; mfcc_t *varp; mfcc_t *detp; int32 floored; floored = 0; /* Allocate space for determinants */ g->det = (mfcc_t***)ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det)); for (m = 0; m < g->n_mgau; m++) { for (f = 0; f < g->n_feat; f++) { flen = g->featlen[f]; /* Determinants for all variance vectors in g->[m][f] */ for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) { *detp = 0; for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d]; i < flen; i++, varp++, meanp++) { float32 *fvarp = (float32 *)varp; #ifdef FIXED_POINT float32 *fmp = (float32 *)meanp; *meanp = FLOAT2MFCC(*fmp); #endif if (*fvarp < varfloor) { *fvarp = varfloor; ++floored; } *detp += (mfcc_t)logmath_log(lmath, 1.0 / sqrt(*fvarp * 2.0 * M_PI)); /* Precompute this part of the exponential */ *varp = (mfcc_t)logmath_ln_to_log(lmath, (1.0 / (*fvarp * 2.0))); } } } } E_INFO("%d variance values floored\n", floored); return 0; }
int32 ngram_model_add_word(ngram_model_t *model, const char *word, float32 weight) { int32 wid, prob = model->log_zero; wid = ngram_add_word_internal(model, word, -1); if (wid == NGRAM_INVALID_WID) return wid; /* Do what needs to be done to add the word to the unigram. */ if (model->funcs && model->funcs->add_ug) prob = (*model->funcs->add_ug)(model, wid, logmath_log(model->lmath, weight)); if (prob == 0) { if (model->writable) ckd_free(model->word_str[wid]); return -1; } return wid; }
static int32 lm3g_template_raw_score(ngram_model_t *base, int32 wid, int32 *history, int32 n_hist, int32 *n_used) { NGRAM_MODEL_TYPE *model = (NGRAM_MODEL_TYPE *)base; int32 score; switch (n_hist) { case 0: /* Access mode: unigram */ *n_used = 1; /* Undo insertion penalty. */ score = model->lm3g.unigrams[wid].prob1.l - base->log_wip; /* Undo language weight. */ score = (int32)(score / base->lw); /* Undo unigram interpolation */ if (strcmp(base->word_str[wid], "<s>") != 0) { /* FIXME: configurable start_sym */ /* This operation is numerically unstable, so try to avoid it * as possible */ if (base->log_uniform + base->log_uniform_weight > logmath_get_zero(base->lmath)) { score = logmath_log(base->lmath, logmath_exp(base->lmath, score) - logmath_exp(base->lmath, base->log_uniform + base->log_uniform_weight)); } } return score; case 1: score = lm3g_bg_score(model, history[0], wid, n_used); break; case 2: default: /* Anything greater than 2 is the same as a trigram for now. */ score = lm3g_tg_score(model, history[1], history[0], wid, n_used); break; } /* FIXME (maybe): This doesn't undo unigram weighting in backoff cases. */ return (int32)((score - base->log_wip) / base->lw); }
static int32 s3_precomp(s2_semi_mgau_t *s, logmath_t *lmath, float32 vFloor) { int feat; for (feat = 0; feat < s->n_feat; ++feat) { float32 *fmp; mfcc_t *mp; mfcc_t *vp, *dp; int32 vecLen, i; vecLen = s->veclen[feat]; fmp = (float32 *) s->means[feat]; mp = s->means[feat]; vp = s->vars[feat]; dp = s->dets[feat]; for (i = 0; i < s->n_density; ++i) { mfcc_t d; int32 j; d = 0; for (j = 0; j < vecLen; ++j, ++vp, ++mp, ++fmp) { float64 fvar; *mp = FLOAT2MFCC(*fmp); /* Always do these pre-calculations in floating point */ fvar = *(float32 *) vp; if (fvar < vFloor) fvar = vFloor; d += (mfcc_t)logmath_log(lmath, 1 / sqrt(fvar * 2.0 * M_PI)); *vp = (mfcc_t)logmath_ln_to_log(lmath, 1.0 / (2.0 * fvar)); } *dp++ = d; } } return 0; }
static int32 senone_mixw_read(logmath_t * logmath, senone_t *s, const char *file_name, float64 mixwfloor) { FILE *fp; char **argname, **argval; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, j, f, m, c, p, n_sen, n_err, n_cw, nval; char eofchk; mixw_t *fw; E_INFO("Reading senone mixture weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr (fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp (argname[i], "version") == 0) { if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MIXW_PARAM_VERSION); } else if (strcmp (argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free (argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ n_sen = s->n_sen; if ((bio_fread (&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&(n_cw), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread (&nval, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if ((n_sen != 0) && (s->n_sen != n_sen)) E_FATAL("#senones(%d) conflict with mapping file(%d)\n", s->n_sen, n_sen); if (s->n_sen >= MAX_SENID) E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", file_name, s->n_sen, MAX_SENID); if (s->n_feat <= 0) E_FATAL("Bad #features: %d\n", s->n_feat); if (n_cw <= 0) E_FATAL("Bad #mixing-wts/senone: %d\n", n_cw); /* Allocate sen2mgau map if not yet done so (i.e. no explicit mapping file given */ if (! s->sen2mgau) { assert ((s->n_mgau == 0) || (s->n_mgau == 1)); s->sen2mgau = (uint32 *) ckd_calloc (s->n_sen, sizeof(int32)); if (s->n_mgau == 1) { /* Semicontinuous mode; all senones map to single, shared gaussian: 0 */ for (i = 0; i < s->n_sen; i++) s->sen2mgau[i] = 0; } else { /* Fully continuous mode; each senone maps to own parent gaussian */ s->n_mgau = s->n_sen; for (i = 0; i < s->n_sen; i++) s->sen2mgau[i] = i; } } else assert (s->n_mgau != 0); if (s->n_mgau >= MAX_MGAUID) E_FATAL("%s: #gauden (%d) exceeds limit (%d)\n", file_name, s->n_mgau, MAX_MGAUID); if (nval != s->n_sen * s->n_feat * n_cw) { E_FATAL("%s: #float32 values(%d) doesn't match dimensions: %d x %d x %d\n", file_name, nval, s->n_sen, s->n_feat, n_cw); } /* * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. * All PDF values will be truncated (in the LSB positions) by these many bits. */ if ((mixwfloor <= 0.0) || (mixwfloor >= 1.0)) E_FATAL("mixwfloor (%e) not in range (0, 1)\n", mixwfloor); /* Allocate memory for s->mgau2sen and senone PDF data */ build_mgau2sen (s, n_cw); /* Temporary structure to read in floats */ pdf = (float32 *) ckd_calloc (n_cw, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < s->n_sen; i++) { m = s->sen2mgau[i]; /* Parent mgau */ j = s->mgau2sen_idx[i]; /* Index of senone i within list of senones for mgau m */ fw = s->mgau2sen[m].feat_mixw; for (f = 0; f < s->n_feat; f++) { if (bio_fread((void *)pdf, sizeof(float32), n_cw, fp, byteswap, &chksum) != n_cw) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm (pdf, n_cw) == 0.0) n_err++; vector_floor (pdf, n_cw, mixwfloor); vector_sum_norm (pdf, n_cw); /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ for (c = 0; c < n_cw; c++) { p = -logmath_log(logmath, pdf[c]); printf ("%f %d\n", pdf[c], p); fw[f].prob[j][c] = p; } } } if (n_err > 0) E_WARN("Weight normalization failed for %d senones\n", n_err); ckd_free (pdf); if (chksum_present) bio_verify_chksum (fp, byteswap, chksum); if (fread (&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read mixture weights for %d senones: %d features x %d codewords\n", s->n_sen, s->n_feat, n_cw); return 0; }
fsg_model_t * fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) { fsg_model_t *fsg; hash_table_t *vocab; hash_iter_t *itor; int32 lastwid; char **wordptr; char *lineptr; char *fsgname; int32 lineno; int32 n, i, j; int n_state, n_trans, n_null_trans; glist_t nulls; float32 p; lineno = 0; vocab = hash_table_new(32, FALSE); wordptr = NULL; lineptr = NULL; nulls = NULL; fsgname = NULL; fsg = NULL; /* Scan upto FSG_BEGIN header */ for (;;) { n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n < 0) { E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { if (n > 2) { E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", lineno); goto parse_error; } break; } } /* Save FSG name, or it will get clobbered below :(. * If name is missing, try the default. */ if (n == 2) { fsgname = ckd_salloc(wordptr[1]); } else { E_WARN("FSG name is missing\n"); fsgname = ckd_salloc("unknown"); } /* Read #states */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) || (sscanf(wordptr[1], "%d", &n_state) != 1) || (n_state <= 0)) { E_ERROR ("Line[%d]: #states declaration line missing or malformed\n", lineno); goto parse_error; } /* Now create the FSG. */ fsg = fsg_model_init(fsgname, lmath, lw, n_state); ckd_free(fsgname); fsgname = NULL; /* Read start state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) || (fsg->start_state < 0) || (fsg->start_state >= fsg->n_state)) { E_ERROR ("Line[%d]: start state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read final state */ n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if ((n != 2) || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) || (fsg->final_state < 0) || (fsg->final_state >= fsg->n_state)) { E_ERROR ("Line[%d]: final state declaration line missing or malformed\n", lineno); goto parse_error; } /* Read transitions */ lastwid = 0; n_trans = n_null_trans = 0; for (;;) { int32 wid, tprob; n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); if (n <= 0) { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { break; } if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { if (((n != 4) && (n != 5)) || (sscanf(wordptr[1], "%d", &i) != 1) || (sscanf(wordptr[2], "%d", &j) != 1) || (i < 0) || (i >= fsg->n_state) || (j < 0) || (j >= fsg->n_state)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", lineno); goto parse_error; } p = atof_c(wordptr[3]); if ((p <= 0.0) || (p > 1.0)) { E_ERROR ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", lineno); goto parse_error; } } else { E_ERROR("Line[%d]: transition or FSG_END statement expected\n", lineno); goto parse_error; } tprob = (int32) (logmath_log(lmath, p) * fsg->lw); /* Add word to "dictionary". */ if (n > 4) { if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { (void) hash_table_enter_int32(vocab, ckd_salloc(wordptr[4]), lastwid); wid = lastwid; ++lastwid; } fsg_model_trans_add(fsg, i, j, tprob, wid); ++n_trans; } else { if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { ++n_null_trans; nulls = glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); } } } E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); /* Now create a string table from the "dictionary" */ fsg->n_word = hash_table_inuse(vocab); fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) { char const *word = hash_entry_key(itor->ent); int32 wid = (int32) (long) hash_entry_val(itor->ent); fsg->vocab[wid] = (char *) word; } hash_table_free(vocab); /* Do transitive closure on null transitions */ nulls = fsg_model_null_trans_closure(fsg, nulls); glist_free(nulls); ckd_free(lineptr); ckd_free(wordptr); return fsg; parse_error: for (itor = hash_table_iter(vocab); itor; itor = hash_table_iter_next(itor)) ckd_free((char *) hash_entry_key(itor->ent)); glist_free(nulls); hash_table_free(vocab); ckd_free(fsgname); ckd_free(lineptr); ckd_free(wordptr); fsg_model_free(fsg); return NULL; }
int main(int argc, char *argv[]) { logmath_t *lmath; int32 rv; lmath = logmath_init(1.0001, 0, 1); TEST_ASSERT(lmath); printf("log(1e-150) = %d\n", logmath_log(lmath, 1e-150)); TEST_EQUAL_LOG(logmath_log(lmath, 1e-150), -3454050); printf("exp(log(1e-150)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-150))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-150)), 1e-150); printf("log(1e-48) = %d\n", logmath_log(lmath, 1e-48)); printf("exp(log(1e-48)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-48))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-48)), 1e-48); printf("log(42) = %d\n", logmath_log(lmath, 42)); TEST_EQUAL_LOG(logmath_log(lmath, 42), 37378); printf("exp(log(42)) = %f\n",logmath_exp(lmath, logmath_log(lmath, 42))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 42)), 42); printf("log(1e-3 + 5e-3) = %d l+ %d = %d\n", logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3), logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3))); printf("log(1e-3 + 5e-3) = %e + %e = %e\n", logmath_exp(lmath, logmath_log(lmath, 1e-3)), logmath_exp(lmath, logmath_log(lmath, 5e-3)), logmath_exp(lmath, logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3)))); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 5e-48)), logmath_log(lmath, 6e-48)); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 42)), logmath_log(lmath, 42)); rv = logmath_write(lmath, "tmp.logadd"); TEST_EQUAL(rv, 0); logmath_free(lmath); lmath = logmath_read("tmp.logadd"); TEST_ASSERT(lmath); printf("log(1e-150) = %d\n", logmath_log(lmath, 1e-150)); TEST_EQUAL_LOG(logmath_log(lmath, 1e-150), -3454050); printf("exp(log(1e-150)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-150))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-150)), 1e-150); printf("log(1e-48) = %d\n", logmath_log(lmath, 1e-48)); printf("exp(log(1e-48)) = %e\n",logmath_exp(lmath, logmath_log(lmath, 1e-48))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 1e-48)), 1e-48); printf("log(42) = %d\n", logmath_log(lmath, 42)); TEST_EQUAL_LOG(logmath_log(lmath, 42), 37378); printf("exp(log(42)) = %f\n",logmath_exp(lmath, logmath_log(lmath, 42))); TEST_EQUAL_FLOAT(logmath_exp(lmath, logmath_log(lmath, 42)), 41.99); printf("log(1e-3 + 5e-3) = %d l+ %d = %d\n", logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3), logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3))); printf("log(1e-3 + 5e-3) = %e + %e = %e\n", logmath_exp(lmath, logmath_log(lmath, 1e-3)), logmath_exp(lmath, logmath_log(lmath, 5e-3)), logmath_exp(lmath, logmath_add(lmath, logmath_log(lmath, 1e-3), logmath_log(lmath, 5e-3)))); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 5e-48)), logmath_log(lmath, 6e-48)); TEST_EQUAL_LOG(logmath_add(lmath, logmath_log(lmath, 1e-48), logmath_log(lmath, 42)), logmath_log(lmath, 42)); return 0; }
static int32 senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath) { char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, p, n_err; char **argname, **argval; E_INFO("Reading senone mixture weights: %s\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("Failed to read header from file '%s'\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MIXW_PARAM_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (i != s->n_sen * s->n_feat * s->n_cw) { E_FATAL ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", file_name, i, s->n_sen, s->n_feat, s->n_cw); } /* * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. * All PDF values will be truncated (in the LSB positions) by these many bits. */ if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0)) E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); /* Use a fixed shift for compatibility with everything else. */ E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT); /* * Allocate memory for senone PDF data. Organize normally or transposed depending on * s->n_gauden. */ if (s->n_gauden > 1) { E_INFO("Not transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, sizeof(senprob_t)); } else { E_INFO("Transposing mixture weights in memory\n"); s->pdf = (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, sizeof(senprob_t)); } /* Temporary structure to read in floats */ pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < s->n_sen; i++) { for (f = 0; f < s->n_feat; f++) { if (bio_fread ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, &chksum) != s->n_cw) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm(pdf, s->n_cw) <= 0.0) n_err++; vector_floor(pdf, s->n_cw, s->mixwfloor); vector_sum_norm(pdf, s->n_cw); /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ for (c = 0; c < s->n_cw; c++) { p = -(logmath_log(lmath, pdf[c])); p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */ if (s->n_gauden > 1) s->pdf[i][f][c] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; else s->pdf[f][c][i] = (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; } } }
static int32 read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin) { char **argname, **argval; char eofchk; FILE *fp; int32 byteswap, chksum_present; uint32 chksum; float32 *pdf; int32 i, f, c, n; int32 n_sen; int32 n_feat; int32 n_comp; int32 n_err; E_INFO("Reading mixture weights file '%s'\n", file_name); if ((fp = fopen(file_name, "rb")) == NULL) E_FATAL("fopen(%s,rb) failed\n", file_name); /* Read header, including argument-value info and 32-bit byteorder magic */ if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) E_FATAL("bio_readhdr(%s) failed\n", file_name); /* Parse argument-value list */ chksum_present = 0; for (i = 0; argname[i]; i++) { if (strcmp(argname[i], "version") == 0) { if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) E_WARN("Version mismatch(%s): %s, expecting %s\n", file_name, argval[i], MGAU_MIXW_VERSION); } else if (strcmp(argname[i], "chksum0") == 0) { chksum_present = 1; /* Ignore the associated value */ } } bio_hdrarg_free(argname, argval); argname = argval = NULL; chksum = 0; /* Read #senones, #features, #codewords, arraysize */ if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != 1) || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); } if (n_feat != s->n_feat) E_FATAL("#Features streams(%d) != %d\n", n_feat, s->n_feat); if (n != n_sen * n_feat * n_comp) { E_FATAL ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", file_name, i, n_sen, n_feat, n_comp); } /* n_sen = number of mixture weights per codeword, which is * fixed at the number of senones since we have only one codebook. */ s->n_sen = n_sen; /* Quantized mixture weight arrays. */ s->mixw = ckd_calloc_3d(s->n_feat, s->n_density, n_sen, sizeof(***s->mixw)); /* Temporary structure to read in floats before conversion to (int32) logs3 */ pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ n_err = 0; for (i = 0; i < n_sen; i++) { for (f = 0; f < n_feat; f++) { if (bio_fread((void *) pdf, sizeof(float32), n_comp, fp, byteswap, &chksum) != n_comp) { E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); } /* Normalize and floor */ if (vector_sum_norm(pdf, n_comp) <= 0.0) n_err++; vector_floor(pdf, n_comp, SmoothMin); vector_sum_norm(pdf, n_comp); /* Convert to LOG, quantize, and transpose */ for (c = 0; c < n_comp; c++) { int32 qscr; qscr = -logmath_log(s->lmath_8b, pdf[c]); if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) qscr = MAX_NEG_MIXW; s->mixw[f][c][i] = qscr; } } } if (n_err > 0) E_WARN("Weight normalization failed for %d senones\n", n_err); ckd_free(pdf); if (chksum_present) bio_verify_chksum(fp, byteswap, chksum); if (fread(&eofchk, 1, 1, fp) == 1) E_FATAL("More data than expected in %s\n", file_name); fclose(fp); E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); return n_sen; }
void run_tests(logmath_t *lmath, ngram_model_t *model) { int32 rv, i; TEST_ASSERT(model); TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); rv = ngram_model_read_classdef(model, LMDIR "/100.probdef"); TEST_EQUAL(rv, 0); /* Verify that class word IDs remain the same. */ TEST_EQUAL(ngram_wid(model, "scylla"), 285); TEST_EQUAL(strcmp(ngram_word(model, 285), "scylla"), 0); /* Verify in-class word IDs. */ TEST_EQUAL(ngram_wid(model, "scylla:scylla"), 0x80000000 | 400); /* Verify in-class and out-class unigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "scooby:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.1)); TEST_EQUAL_LOG(ngram_score(model, "scylla", NULL), logmath_log10_to_log(lmath, -2.7884)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", NULL), logmath_log10_to_log(lmath, -1.9038) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "zero", NULL), logmath_log10_to_log(lmath, -1.9038)); /* Verify class bigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642)); TEST_EQUAL_LOG(ngram_score(model, "scylla:scylla", "on", NULL), logmath_log10_to_log(lmath, -1.2642) + logmath_log(lmath, 0.4)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "karybdis:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); TEST_EQUAL_LOG(ngram_score(model, "apparently", "scooby:scylla", NULL), logmath_log10_to_log(lmath, -0.5172)); /* Verify class trigram scores. */ TEST_EQUAL_LOG(ngram_score(model, "zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725)); TEST_EQUAL_LOG(ngram_score(model, "oh:zero", "be", "will", NULL), logmath_log10_to_log(lmath, -0.5725) + logmath_log(lmath, 0.7)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero", NULL), logmath_log10_to_log(lmath, -0.9404)); TEST_EQUAL_LOG(ngram_score(model, "should", "variance", "zero:zero", NULL), logmath_log10_to_log(lmath, -0.9404)); /* Add words to classes. */ rv = ngram_model_add_class_word(model, "scylla", "scrappy:scylla", 1.0); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, "scrappy:scylla"), 0x80000196); TEST_EQUAL_LOG(ngram_score(model, "scrappy:scylla", NULL), logmath_log10_to_log(lmath, -2.7884) + logmath_log(lmath, 0.2)); printf("scrappy:scylla %08x %d %f\n", ngram_wid(model, "scrappy:scylla"), ngram_score(model, "scrappy:scylla", NULL), logmath_exp(lmath, ngram_score(model, "scrappy:scylla", NULL))); /* Add a lot of words to a class. */ for (i = 0; i < 129; ++i) { char word[32]; sprintf(word, "%d:scylla", i); rv = ngram_model_add_class_word(model, "scylla", word, 1.0); printf("%s %08x %d %f\n", word, ngram_wid(model, word), ngram_score(model, word, NULL), logmath_exp(lmath, ngram_score(model, word, NULL))); TEST_ASSERT(rv >= 0); TEST_EQUAL(ngram_wid(model, word), 0x80000197 + i); } /* Add a new class. */ { const char *words[] = { "blatz:foobie", "hurf:foobie" }; float32 weights[] = { 0.6, 0.4 }; int32 foobie_prob; rv = ngram_model_add_class(model, "[foobie]", 1.0, words, weights, 2); TEST_ASSERT(rv >= 0); foobie_prob = ngram_score(model, "[foobie]", NULL); TEST_EQUAL_LOG(ngram_score(model, "blatz:foobie", NULL), foobie_prob + logmath_log(lmath, 0.6)); TEST_EQUAL_LOG(ngram_score(model, "hurf:foobie", NULL), foobie_prob + logmath_log(lmath, 0.4)); } }