int main(int argc, char **argv) { si_t si = make_si(1024); FILE *grammarfp = stdin; FILE *yieldfp; FILE *tracefp = stderr; /* set this to NULL to stop trace output */ FILE *summaryfp = stderr; /* set this to NULL to stop parse stats */ grammar g0, g = NULL; int maxlen = 0, minits = 0, maxits = 0; FLOAT stoptol = 1e-7; int nanneal = 0, randseed = 97; FLOAT rule_bias_default = 0; FLOAT annealstart = 1, annealstop = 1; FLOAT jitter0 = 0, jitter = 0; int debuglevel = 0, nruns = 1, irun = 0; FLOAT wordscale=1; int VariationalBayes=0; int tmp; char filename[100]; { int chr; while ((chr = getopt(argc, argv, "a:g:s:p:l:m:n:d:t:b:B:N:j:J:S:d:R:T:VW:")) != -1) switch (chr) { case 'a': rule_bias_default = atof(optarg); break; case 'g': memcpy (filename, optarg, strlen (optarg)); tmp = strlen (optarg); filename[tmp] = 'N'; tmp++; filename[tmp] = 'e'; tmp++; filename[tmp] = 'w'; tmp++; filename[tmp] = '\0'; tmp++; grammarfp = fopen(optarg, "r"); if (grammarfp == NULL) { fprintf(stderr, "Error: couldn't open grammarfile %s\n%s", optarg, usage); exit(EXIT_FAILURE); } break; case 's': stoptol = atof(optarg); break; case 'p': minruleprob = atof(optarg); break; case 'l': maxlen = atoi(optarg); break; case 'm': minits = atoi(optarg); break; case 'n': maxits = atoi(optarg); break; case 'b': annealstart = atof(optarg); break; case 'B': annealstop = atof(optarg); break; case 'N': nanneal = atoi(optarg); break; case 'j': jitter0 = atof(optarg); break; case 'J': jitter = atof(optarg); break; case 'S': randseed = atoi(optarg); break; case 'd': debuglevel = atoi(optarg); break; case 'R': nruns = atoi(optarg); break; case 'T': summaryfp = fopen(optarg, "w"); break; case 'V': VariationalBayes = 1; break; case 'W': wordscale = atof(optarg); break; case '?': default: fprintf(stderr, "Error: unknown command line flag %c\n\n%s\n", chr, usage); exit(EXIT_FAILURE); break; } } if (optind + 1 != argc) { fprintf(stderr, "Error: expect a yieldfile\n\n%s\n", usage); exit(EXIT_FAILURE); } if ((yieldfp = fopen(argv[optind], "r")) == NULL) { fprintf(stderr, "Error: Couldn't open yieldfile %s\n%s", argv[optind], usage); exit(EXIT_FAILURE); } srand(randseed); if (summaryfp && debuglevel >= 100) fprintf(summaryfp, "# rule_bias_default (-a) = %g, stoptol (-s) = %g, minruleprob (-p) = %g, " "maxlen (-l) = %d, minits (-m) = %d, maxits = (-n) = %d, annealstart (-b) = %g, " "annealstop (-B) = %g, nanneal (-N) = %d, jitter0 (-j) = %g, jitter (-J) = %g, " "VariationalBayes (-V) = %d, wordscale (-W) = %g, randseed (-S) = %d, " "debuglevel (-d) = %d, nruns (-R) = %d\n", rule_bias_default, stoptol, minruleprob, maxlen, minits, maxits, annealstart, annealstop, nanneal, jitter0, jitter, VariationalBayes, wordscale, randseed, debuglevel, nruns); g0 = read_grammar(grammarfp, si, rule_bias_default); write_grammar(stdout, g0, si, minruleprob); set_rule_weights(g0, g0->weights, VariationalBayes); /* normalize rule counts */ signal(SIGINT, write_grammar_); for (irun = 0; irun < nruns; ++irun) { FLOAT entropy; g = copy_grammar(g0, si); if (summaryfp && debuglevel >= 100) fprintf(summaryfp, "# Run %d\n", irun); g_global = g; si_global = si; if (jitter0 > 0) jitter_weights(g, jitter0); entropy = inside_outside(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxlen, minits, maxits, stoptol, minruleprob, jitter, VariationalBayes, wordscale, annealstart, annealstop, nanneal); if (summaryfp && debuglevel >= 0) fprintf(summaryfp, "# run %d, entropy %g, %ld rules\n", irun, entropy, (long) g->nrules); if (debuglevel >= 1) { write_grammar(stdout, g, si, minruleprob); FILE* grammarfp = fopen(filename, "w"); //write_grammar(stdout, g, si, minruleprob); write_grammar(grammarfp, g, si, minruleprob); fprintf(stdout, "\n"); fflush(stdout); } free_grammar(g); } free_grammar(g0); si_free(si); if (mmm_blocks_allocated) fprintf(stderr, "Error in mrf(): %ld memory block(s) not deallocated\n", mmm_blocks_allocated); /* check that everything has been deallocated */ assert(mmm_blocks_allocated == 0); exit(EXIT_SUCCESS); }
FLOAT inside_outside(grammar g, const si_t si, FILE *yieldfp, FILE *tracefp, FILE *summaryfp, int debuglevel, int maxsentlen, int minits, int maxits, FLOAT stoptol, FLOAT minruleprob, FLOAT jitter, int VariationalBayes, FLOAT wordscale, FLOAT annealstart, FLOAT annealstop, int nanneal, int weighted_yields_flag) { FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT)); FLOAT sum_neglog_prob0; FLOAT sum_neglog_prob; int iteration = 0; size_t nrules, nrules0; FLOAT sum_yieldweights; FLOAT temperature = annealstart; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "# Iteration\ttemperature\tnrules\t-logP\tbits/token\n%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature = %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob0 = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); while (1) { ++iteration; add_bias(g, rule_counts); set_rule_weights(g, rule_counts, VariationalBayes); prune_grammar(g, si, minruleprob); if (jitter != 0) jitter_weights(g, jitter); set_rule_weights(g, g->weights, 0); if (iteration < nanneal) { temperature = annealstart*pow(annealstop/annealstart, (iteration-1.0)/(nanneal-1.0)); scale_weights(g, 1.0/temperature); } else temperature = 1.0; nrules0 = nrules; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); if (nrules==nrules0 && iteration >= minits && ((maxits > 0 && iteration >= maxits) || (sum_neglog_prob0-sum_neglog_prob)/fabs(sum_neglog_prob) < stoptol)) break; sum_neglog_prob0 = sum_neglog_prob; } FREE(rule_counts); return(sum_neglog_prob/(log(2)*sum_yieldweights)); }