Пример #1
0
void
ld_read_lm(live_decoder_t * _decoder,
           const char *lmpath, const char *lmname)
{
    srch_t *s;
    lm_t *lm;
    int32 ndict;
    s = (srch_t *) _decoder->kb.srch;

    ndict = dict_size(_decoder->kb.kbcore->dict);


    lm = lm_read_advance(lmpath, lmname, cmd_ln_float32("-lw"), cmd_ln_float32("-wip"), cmd_ln_float32("-uw"), ndict, NULL, 1   /* Weight apply */
        );

    s->srch_add_lm(s, lm, lmname);
}
Пример #2
0
lmset_t *
lmset_read_lm(const char *lmfile, dict_t * dict, const char *lmname,
              float64 lw, float64 wip, float64 uw, const char *lmdumpdir,
              logmath_t *logmath)
{
    lmset_t *lms;

    lms = (lmset_t *) ckd_calloc(1, sizeof(lmset_t));
    lms->n_lm = 1;
    lms->n_alloc_lm = 1;

    /* Only allocate one single LM.  This assumes no class definition would be defined. 
     */
    lms->lmarray = (lm_t **) ckd_calloc(1, sizeof(lm_t *));
    /* 
       No need to check whether lmname exists here.
     */
    if ((lms->lmarray[0] =
         lm_read_advance(lmfile, lmname, lw, wip, uw, dict_size(dict),
                         NULL, 1, logmath, FALSE, FALSE)) == NULL)
        E_FATAL
            ("lm_read_advance(%s, %e, %e, %e %d [Arbitrary Fmt], Weighted Apply) failed\n",
             lmfile, lw, wip, uw, dict_size(dict));

    if (dict != NULL) {
        assert(lms->lmarray[0]);
        if ((lms->lmarray[0]->dict2lmwid =
             wid_dict_lm_map(dict, lms->lmarray[0], lw)) == NULL)
            E_FATAL
                ("Dict/LM word-id mapping failed for LM index %d, named %s\n",
                 0, lmset_idx_to_name(lms, 0));

    }
    else {
        E_FATAL
            ("Dict is specified to be NULL (dict_init is not called before lmset_read_lm?), dict2lmwid is not built inside lmset_read_lm\n");
    }

    return lms;
}
Пример #3
0
void
s3_decode_read_lm(s3_decode_t * _decode,
           const char *lmpath, const char *lmname)
{
    srch_t *s;
    lm_t *lm;
    int32 ndict;
    s = (srch_t *) _decode->kb.srch;

    ndict = dict_size(_decode->kb.kbcore->dict);


    lm = lm_read_advance(lmpath, lmname,
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-lw"),
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-wip"),
                         cmd_ln_float32_r(kbcore_config(_decode->kbcore), "-uw"),
                         ndict, NULL, 1,   /* Weight apply */
                         kbcore_logmath(s->kbc),
                         cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-ugonly"),
                         cmd_ln_boolean_r(kbcore_config(_decode->kbcore), "-bgonly")
        );

    s->funcs->add_lm(s, lm, lmname);
}
Пример #4
0
lmset_t *
lmset_read_ctl(const char *ctlfile,
               dict_t * dict,
               float64 lw, float64 wip, float64 uw,
               const char *lmdumpdir, logmath_t *logmath)
{
    FILE *ctlfp;
    FILE *tmp;
    char lmfile[4096], lmname[4096], str[4096];

    lmclass_set_t *lmclass_set;
    lmclass_t **lmclass, *cl;
    int32 n_lmclass, n_lmclass_used;
    int32 i;
    lm_t *lm;
    lmset_t *lms = NULL;
    tmp = NULL;

    E_INFO("Reading LM control file '%s'\n", ctlfile);
    if ((ctlfp = fopen(ctlfile, "r")) == NULL) {
	    E_ERROR_SYSTEM("Failed to open LM control file");
	    return NULL;
    }

    lmclass_set = lmclass_newset();

    lms = (lmset_t *) ckd_calloc(1, sizeof(lmset_t));
    lms->n_lm = 0;
    lms->n_alloc_lm = 0;

    if (fscanf(ctlfp, "%s", str) == 1) {
        if (strcmp(str, "{") == 0) {
            /* Load LMclass files */
            while ((fscanf(ctlfp, "%s", str) == 1)
                   && (strcmp(str, "}") != 0))
                lmclass_set = lmclass_loadfile(lmclass_set, str, logmath);

            if (strcmp(str, "}") != 0)
                E_FATAL("Unexpected EOF(%s)\n", ctlfile);

            if (fscanf(ctlfp, "%s", str) != 1)
                str[0] = '\0';
        }
    }
    else
        str[0] = '\0';

    /* Fill in dictionary word id information for each LMclass word */
    for (cl = lmclass_firstclass(lmclass_set);
         lmclass_isclass(cl); cl = lmclass_nextclass(lmclass_set, cl)) {

        /*
           For every words in the class, set the dictwid correctly 
           The following piece of code replace s2's kb_init_lmclass_dictwid (cl);
           doesn't do any checking even the id is a bad dict id. 
           This only sets the information in the lmclass_set, but not 
           lm-2-dict or dict-2-lm map.  In Sphinx 3, they are done in 
           wid_dict_lm_map in wid.c.
         */

        lmclass_word_t *w;
        int32 wid;
        for (w = lmclass_firstword(cl); lmclass_isword(w);
             w = lmclass_nextword(cl, w)) {
            wid = dict_wordid(dict, lmclass_getword(w));
#if 0
            E_INFO("In class %s, Word %s, wid %d\n", cl->name,
                   lmclass_getword(w), wid);
#endif
            lmclass_set_dictwid(w, wid);
        }
    }

    /* At this point if str[0] != '\0', we have an LM filename */

    n_lmclass = lmclass_get_nclass(lmclass_set);
    lmclass = (lmclass_t **) ckd_calloc(n_lmclass, sizeof(lmclass_t *));

    E_INFO("Number of LM class specified %d in file %s\n", n_lmclass,
           ctlfile);

    /* Read in one LM at a time */
    while (str[0] != '\0') {
        strcpy(lmfile, str);
        if (fscanf(ctlfp, "%s", lmname) != 1)
            E_FATAL("LMname missing after LMFileName '%s'\n", lmfile);

        n_lmclass_used = 0;

        if (fscanf(ctlfp, "%s", str) == 1) {
            if (strcmp(str, "{") == 0) {
                while ((fscanf(ctlfp, "%s", str) == 1) &&
                       (strcmp(str, "}") != 0)) {
                    if (n_lmclass_used >= n_lmclass) {
                        E_FATAL("Too many LM classes specified for '%s'\n",
                                lmfile);
                    }

                    lmclass[n_lmclass_used] =
                        lmclass_get_lmclass(lmclass_set, str);
                    if (!(lmclass_isclass(lmclass[n_lmclass_used])))
                        E_FATAL("LM class '%s' not found\n", str);
                    n_lmclass_used++;
                }
                if (strcmp(str, "}") != 0)
                    E_FATAL("Unexpected EOF(%s)\n", ctlfile);
                if (fscanf(ctlfp, "%s", str) != 1)
                    str[0] = '\0';
            }
        }
        else
            str[0] = '\0';

        lm = (lm_t *) lm_read_advance(lmfile, lmname, lw, wip, uw,
                                      dict_size(dict), NULL, 1, logmath, FALSE, FALSE);


        if (n_lmclass_used > 0) {
            E_INFO("Did I enter here?\n");
            lm_build_lmclass_info(lm, lw, uw, wip, n_lmclass_used,
                                  lmclass);
        }

        if (lms->n_lm == lms->n_alloc_lm) {
            lms->lmarray =
                (lm_t **) ckd_realloc(lms->lmarray,
                                      (lms->n_alloc_lm +
                                       LM_ALLOC_BLOCK) * sizeof(lm_t *));
            lms->n_alloc_lm += LM_ALLOC_BLOCK;
        }

        lms->lmarray[lms->n_lm] = lm;
        lms->n_lm += 1;
        E_INFO("%d %d\n", lms->n_alloc_lm, lms->n_lm);
    }

    assert(lms);
    assert(lms->lmarray);
    E_INFO("No. of LM set allocated %d, no. of LM %d \n", lms->n_alloc_lm,
           lms->n_lm);


    if (dict != NULL) {
        for (i = 0; i < lms->n_lm; i++) {
            assert(lms->lmarray[i]);
            assert(dict);
            if ((lms->lmarray[i]->dict2lmwid =
                 wid_dict_lm_map(dict, lms->lmarray[i], lw)) == NULL)
                E_FATAL
                    ("Dict/LM word-id mapping failed for LM index %d, named %s\n",
                     i, lmset_idx_to_name(lms, i));
        }
    }
    else {
        E_FATAL
            ("Dict is specified to be NULL (dict_init is not called before lmset_read_lm?), dict2lmwid is not built inside lmset_read_lm\n");
    }


    ckd_free(lmclass_set);
    ckd_free(lmclass);
    fclose(ctlfp);
    return lms;
}
Пример #5
0
int
main(int argc, char *argv[])
{
    char *lm_file;
    char *args_file;
    char *ngrams_file;
    char *lmLoadTimer = "LM Load";
    char *lmLookupTimer = "LM Lookup";

    char *ngrams[MAX_NGRAMS];

    float64 lw, wip, uw, logbase;

    int i, n;

    int32 nwords[MAX_NGRAMS];
    int scores[MAX_NGRAMS];

    lm_t *lm;

    s3lmwid32_t *wid[MAX_NGRAMS];


    if (argc < 3) {
        E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n",
                argv[0]);
    }

    args_file = argv[1];
    lm_file = argv[2];
    ngrams_file = argv[3];

    parse_args_file(args_file);

    lw = cmd_ln_float32("-lw");
    wip = cmd_ln_float32("-wip");
    uw = cmd_ln_float32("-uw");
    logbase = cmd_ln_float32("-logbase");

    logs3_init(logbase, 1, 1);  /*Report progress and use log table */

    metricsStart(lmLoadTimer);

    /* initialize the language model */
    /* HACK! This doesn't work for class-based LM */
    lm = lm_read_advance(lm_file, "default", lw, wip, uw, 0, NULL, 1);

    metricsStop(lmLoadTimer);

    /* read in all the N-grams */
    n = read_ngrams(ngrams_file, ngrams, wid, nwords, MAX_NGRAMS, lm);

    metricsStart(lmLookupTimer);

    /* scores the N-grams */
    for (i = 0; i < n; i++) {
        scores[i] = score_ngram(wid[i], nwords[i], lm);
    }

    metricsStop(lmLookupTimer);

    for (i = 0; i < n; i++) {
        printf("%-10d %s\n", scores[i], ngrams[i]);
    }

    printf("Bigram misses: %d \n", lm->n_bg_bo);
    printf("Trigram misses: %d \n", lm->n_tg_bo);

    fflush(stdout);

    metricsPrint();
    return 0;
}