void ld_read_lm(live_decoder_t * _decoder, const char *lmpath, const char *lmname) { srch_t *s; lm_t *lm; int32 ndict; s = (srch_t *) _decoder->kb.srch; ndict = dict_size(_decoder->kb.kbcore->dict); lm = lm_read_advance(lmpath, lmname, cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), ndict, NULL, 1 /* Weight apply */ ); s->srch_add_lm(s, lm, lmname); }
lmset_t * lmset_read_lm(const char *lmfile, dict_t * dict, const char *lmname, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath) { lmset_t *lms; lms = (lmset_t *) ckd_calloc(1, sizeof(lmset_t)); lms->n_lm = 1; lms->n_alloc_lm = 1; /* Only allocate one single LM. This assumes no class definition would be defined. */ lms->lmarray = (lm_t **) ckd_calloc(1, sizeof(lm_t *)); /* No need to check whether lmname exists here. */ if ((lms->lmarray[0] = lm_read_advance(lmfile, lmname, lw, wip, uw, dict_size(dict), NULL, 1, logmath, FALSE, FALSE)) == NULL) E_FATAL ("lm_read_advance(%s, %e, %e, %e %d [Arbitrary Fmt], Weighted Apply) failed\n", lmfile, lw, wip, uw, dict_size(dict)); if (dict != NULL) { assert(lms->lmarray[0]); if ((lms->lmarray[0]->dict2lmwid = wid_dict_lm_map(dict, lms->lmarray[0], lw)) == NULL) E_FATAL ("Dict/LM word-id mapping failed for LM index %d, named %s\n", 0, lmset_idx_to_name(lms, 0)); } else { E_FATAL ("Dict is specified to be NULL (dict_init is not called before lmset_read_lm?), dict2lmwid is not built inside lmset_read_lm\n"); } return lms; }
void s3_decode_read_lm(s3_decode_t * _decode, const char *lmpath, const char *lmname) { srch_t *s; lm_t *lm; int32 ndict; s = (srch_t *) _decode->kb.srch; ndict = dict_size(_decode->kb.kbcore->dict); lm = lm_read_advance(lmpath, lmname, cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-lw"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-wip"), cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-uw"), ndict, NULL, 1, /* Weight apply */ kbcore_logmath(s->kbc), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-ugonly"), cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-bgonly") ); s->funcs->add_lm(s, lm, lmname); }
lmset_t * lmset_read_ctl(const char *ctlfile, dict_t * dict, float64 lw, float64 wip, float64 uw, const char *lmdumpdir, logmath_t *logmath) { FILE *ctlfp; FILE *tmp; char lmfile[4096], lmname[4096], str[4096]; lmclass_set_t *lmclass_set; lmclass_t **lmclass, *cl; int32 n_lmclass, n_lmclass_used; int32 i; lm_t *lm; lmset_t *lms = NULL; tmp = NULL; E_INFO("Reading LM control file '%s'\n", ctlfile); if ((ctlfp = fopen(ctlfile, "r")) == NULL) { E_ERROR_SYSTEM("Failed to open LM control file"); return NULL; } lmclass_set = lmclass_newset(); lms = (lmset_t *) ckd_calloc(1, sizeof(lmset_t)); lms->n_lm = 0; lms->n_alloc_lm = 0; if (fscanf(ctlfp, "%s", str) == 1) { if (strcmp(str, "{") == 0) { /* Load LMclass files */ while ((fscanf(ctlfp, "%s", str) == 1) && (strcmp(str, "}") != 0)) lmclass_set = lmclass_loadfile(lmclass_set, str, logmath); if (strcmp(str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", ctlfile); if (fscanf(ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; /* Fill in dictionary word id information for each LMclass word */ for (cl = lmclass_firstclass(lmclass_set); lmclass_isclass(cl); cl = lmclass_nextclass(lmclass_set, cl)) { /* For every words in the class, set the dictwid correctly The following piece of code replace s2's kb_init_lmclass_dictwid (cl); doesn't do any checking even the id is a bad dict id. This only sets the information in the lmclass_set, but not lm-2-dict or dict-2-lm map. In Sphinx 3, they are done in wid_dict_lm_map in wid.c. */ lmclass_word_t *w; int32 wid; for (w = lmclass_firstword(cl); lmclass_isword(w); w = lmclass_nextword(cl, w)) { wid = dict_wordid(dict, lmclass_getword(w)); #if 0 E_INFO("In class %s, Word %s, wid %d\n", cl->name, lmclass_getword(w), wid); #endif lmclass_set_dictwid(w, wid); } } /* At this point if str[0] != '\0', we have an LM filename */ n_lmclass = lmclass_get_nclass(lmclass_set); lmclass = (lmclass_t **) ckd_calloc(n_lmclass, sizeof(lmclass_t *)); E_INFO("Number of LM class specified %d in file %s\n", n_lmclass, ctlfile); /* Read in one LM at a time */ while (str[0] != '\0') { strcpy(lmfile, str); if (fscanf(ctlfp, "%s", lmname) != 1) E_FATAL("LMname missing after LMFileName '%s'\n", lmfile); n_lmclass_used = 0; if (fscanf(ctlfp, "%s", str) == 1) { if (strcmp(str, "{") == 0) { while ((fscanf(ctlfp, "%s", str) == 1) && (strcmp(str, "}") != 0)) { if (n_lmclass_used >= n_lmclass) { E_FATAL("Too many LM classes specified for '%s'\n", lmfile); } lmclass[n_lmclass_used] = lmclass_get_lmclass(lmclass_set, str); if (!(lmclass_isclass(lmclass[n_lmclass_used]))) E_FATAL("LM class '%s' not found\n", str); n_lmclass_used++; } if (strcmp(str, "}") != 0) E_FATAL("Unexpected EOF(%s)\n", ctlfile); if (fscanf(ctlfp, "%s", str) != 1) str[0] = '\0'; } } else str[0] = '\0'; lm = (lm_t *) lm_read_advance(lmfile, lmname, lw, wip, uw, dict_size(dict), NULL, 1, logmath, FALSE, FALSE); if (n_lmclass_used > 0) { E_INFO("Did I enter here?\n"); lm_build_lmclass_info(lm, lw, uw, wip, n_lmclass_used, lmclass); } if (lms->n_lm == lms->n_alloc_lm) { lms->lmarray = (lm_t **) ckd_realloc(lms->lmarray, (lms->n_alloc_lm + LM_ALLOC_BLOCK) * sizeof(lm_t *)); lms->n_alloc_lm += LM_ALLOC_BLOCK; } lms->lmarray[lms->n_lm] = lm; lms->n_lm += 1; E_INFO("%d %d\n", lms->n_alloc_lm, lms->n_lm); } assert(lms); assert(lms->lmarray); E_INFO("No. of LM set allocated %d, no. of LM %d \n", lms->n_alloc_lm, lms->n_lm); if (dict != NULL) { for (i = 0; i < lms->n_lm; i++) { assert(lms->lmarray[i]); assert(dict); if ((lms->lmarray[i]->dict2lmwid = wid_dict_lm_map(dict, lms->lmarray[i], lw)) == NULL) E_FATAL ("Dict/LM word-id mapping failed for LM index %d, named %s\n", i, lmset_idx_to_name(lms, i)); } } else { E_FATAL ("Dict is specified to be NULL (dict_init is not called before lmset_read_lm?), dict2lmwid is not built inside lmset_read_lm\n"); } ckd_free(lmclass_set); ckd_free(lmclass); fclose(ctlfp); return lms; }
int main(int argc, char *argv[]) { char *lm_file; char *args_file; char *ngrams_file; char *lmLoadTimer = "LM Load"; char *lmLookupTimer = "LM Lookup"; char *ngrams[MAX_NGRAMS]; float64 lw, wip, uw, logbase; int i, n; int32 nwords[MAX_NGRAMS]; int scores[MAX_NGRAMS]; lm_t *lm; s3lmwid32_t *wid[MAX_NGRAMS]; if (argc < 3) { E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n", argv[0]); } args_file = argv[1]; lm_file = argv[2]; ngrams_file = argv[3]; parse_args_file(args_file); lw = cmd_ln_float32("-lw"); wip = cmd_ln_float32("-wip"); uw = cmd_ln_float32("-uw"); logbase = cmd_ln_float32("-logbase"); logs3_init(logbase, 1, 1); /*Report progress and use log table */ metricsStart(lmLoadTimer); /* initialize the language model */ /* HACK! This doesn't work for class-based LM */ lm = lm_read_advance(lm_file, "default", lw, wip, uw, 0, NULL, 1); metricsStop(lmLoadTimer); /* read in all the N-grams */ n = read_ngrams(ngrams_file, ngrams, wid, nwords, MAX_NGRAMS, lm); metricsStart(lmLookupTimer); /* scores the N-grams */ for (i = 0; i < n; i++) { scores[i] = score_ngram(wid[i], nwords[i], lm); } metricsStop(lmLookupTimer); for (i = 0; i < n; i++) { printf("%-10d %s\n", scores[i], ngrams[i]); } printf("Bigram misses: %d \n", lm->n_bg_bo); printf("Trigram misses: %d \n", lm->n_tg_bo); fflush(stdout); metricsPrint(); return 0; }