Exemplo n.º 1
0
int main(int argc, char *argv[])
{
    char *lm_file;
    char *args_file;
    char *ngrams_file;
    char *lmLoadTimer = "LM Load";
    char *lmLookupTimer = "LM Lookup";

    char *ngrams[MAX_NGRAMS];

    float64 lw, wip, uw, logbase;

    int i, n, score;
    
    int32 *nwdptr;
    int32 nwords[MAX_NGRAMS];
    int scores[MAX_NGRAMS];

    lm_t *lm;

    s3lmwid_t wid[MAX_NGRAMS][MAX_WORDS_PER_NGRAM];

    FILE* fp;


    if (argc < 3) {
        E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n", argv[0]);
    }

    args_file = argv[1];
    lm_file = argv[2];
    ngrams_file = argv[3];

    parse_args_file(args_file);

    lw = cmd_ln_float32("-lw");
    wip = cmd_ln_float32("-wip");
    uw = cmd_ln_float32("-uw");
    logbase = cmd_ln_float32("-logbase");

    logs3_init(logbase);

    metricsStart(lmLoadTimer);
    
    /* initialize the language model */
    lm = lm_read(lm_file, lw, wip, uw);

    metricsStop(lmLoadTimer);

    if ((fp = fopen(ngrams_file, "r")) == NULL) {
        E_FATAL("Unable to open N-gram file %s\n", ngrams_file);
    }

    
    while (has_more_utterances(fp)) {

      /* read in all the N-grams */
      n = read_ngrams(fp, ngrams, wid, nwords, MAX_NGRAMS, lm);
      
      metricsStart(lmLookupTimer);

      /* scores the N-grams */
      for (i = 0; i < n; i++) {
        scores[i] = score_ngram(wid[i], nwords[i], lm);
        printf("%-10d %s\n", scores[i], ngrams[i]);
	/*
	printf("%-10d %s %d %d %d\n", scores[i], ngrams[i], 
	       wid[i][0], wid[i][1], wid[i][2]);
	*/
      }

      /* reset cache if <END_UTT> was reached */
      if (n != MAX_NGRAMS) {
	lm_cache_reset(lm);
      }

      metricsStop(lmLookupTimer);
    }

    printf("Bigram misses: %d \n", lm->n_bg_bo);
    printf("Trigram misses: %d \n", lm->n_tg_bo);

    fflush(stdout);

    metricsPrint();
}
Exemplo n.º 2
0
int
main(int argc, char *argv[])
{
    char *lm_file;
    char *args_file;
    char *ngrams_file;
    char *lmLoadTimer = "LM Load";
    char *lmLookupTimer = "LM Lookup";

    char *ngrams[MAX_NGRAMS];

    float64 lw, wip, uw, logbase;

    int i, n;

    int32 nwords[MAX_NGRAMS];
    int scores[MAX_NGRAMS];

    lm_t *lm;

    s3lmwid32_t *wid[MAX_NGRAMS];


    if (argc < 3) {
        E_FATAL("USAGE: %s <lm_file> <args_file> <ngrams_file>\n",
                argv[0]);
    }

    args_file = argv[1];
    lm_file = argv[2];
    ngrams_file = argv[3];

    parse_args_file(args_file);

    lw = cmd_ln_float32("-lw");
    wip = cmd_ln_float32("-wip");
    uw = cmd_ln_float32("-uw");
    logbase = cmd_ln_float32("-logbase");

    logs3_init(logbase, 1, 1);  /*Report progress and use log table */

    metricsStart(lmLoadTimer);

    /* initialize the language model */
    /* HACK! This doesn't work for class-based LM */
    lm = lm_read_advance(lm_file, "default", lw, wip, uw, 0, NULL, 1);

    metricsStop(lmLoadTimer);

    /* read in all the N-grams */
    n = read_ngrams(ngrams_file, ngrams, wid, nwords, MAX_NGRAMS, lm);

    metricsStart(lmLookupTimer);

    /* scores the N-grams */
    for (i = 0; i < n; i++) {
        scores[i] = score_ngram(wid[i], nwords[i], lm);
    }

    metricsStop(lmLookupTimer);

    for (i = 0; i < n; i++) {
        printf("%-10d %s\n", scores[i], ngrams[i]);
    }

    printf("Bigram misses: %d \n", lm->n_bg_bo);
    printf("Trigram misses: %d \n", lm->n_tg_bo);

    fflush(stdout);

    metricsPrint();
    return 0;
}