Exemplo n.º 1
0
int      
main(int argc, char **argv)
{
  si_t  si = make_si(1024);
  FILE  *grammarfp = stdin;
  FILE  *yieldfp;
  FILE	*tracefp = stderr;  	/* set this to NULL to stop trace output */
  FILE	*summaryfp = stderr;	/* set this to NULL to stop parse stats */
  grammar g0, g = NULL;
  int	maxlen = 0, minits = 0, maxits = 0;
  FLOAT stoptol = 1e-7;
  int nanneal = 0, randseed = 97;
  FLOAT rule_bias_default = 0;
  FLOAT annealstart = 1, annealstop = 1;
  FLOAT jitter0 = 0, jitter = 0;
  int debuglevel = 0, nruns = 1, irun = 0;
  FLOAT wordscale=1;
  int VariationalBayes=0;
  int tmp;
  char filename[100];

  {
    int chr;
    
    while ((chr = getopt(argc, argv, "a:g:s:p:l:m:n:d:t:b:B:N:j:J:S:d:R:T:VW:")) != -1) 
      switch (chr) {
      case 'a':
	rule_bias_default = atof(optarg);
	break;
      case 'g': 
  memcpy (filename, optarg, strlen (optarg));
  tmp = strlen (optarg);
  filename[tmp] = 'N';
  tmp++;
  filename[tmp] = 'e';
  tmp++;
  filename[tmp] = 'w';
  tmp++;
  filename[tmp] = '\0';
  tmp++;
	grammarfp = fopen(optarg, "r");
	if (grammarfp == NULL) {
	  fprintf(stderr, "Error: couldn't open grammarfile %s\n%s",
		  optarg, usage);
	  exit(EXIT_FAILURE);
	}
	break;
      case 's':
	stoptol = atof(optarg);
	break;
      case 'p':
	minruleprob = atof(optarg);
	break;
      case 'l':
	maxlen = atoi(optarg);
	break;
      case 'm':
	minits = atoi(optarg);
	break;
      case 'n':
	maxits = atoi(optarg);
	break;
      case 'b':
	annealstart = atof(optarg);
	break;
      case 'B':
	annealstop = atof(optarg);
	break;
      case 'N':
	nanneal = atoi(optarg);
	break;
      case 'j':
	jitter0 = atof(optarg);
	break;
      case 'J':
	jitter = atof(optarg);
	break;
      case 'S': 
	randseed = atoi(optarg);
	break;
      case 'd':
	debuglevel = atoi(optarg);
	break;
      case 'R':
	nruns = atoi(optarg);
	break;
      case 'T':
	summaryfp = fopen(optarg, "w");
	break;
      case 'V':
	VariationalBayes = 1;
	break;
      case 'W':
	wordscale = atof(optarg);
	break;
      case '?':
      default:
	fprintf(stderr, "Error: unknown command line flag %c\n\n%s\n",
		chr, usage);
	exit(EXIT_FAILURE);
	break;
      }
  }
    
  if (optind + 1 != argc) {
    fprintf(stderr, "Error: expect a yieldfile\n\n%s\n", usage);
    exit(EXIT_FAILURE);
  }

  if ((yieldfp = fopen(argv[optind], "r")) == NULL) {
    fprintf(stderr, "Error: Couldn't open yieldfile %s\n%s", argv[optind], usage);
    exit(EXIT_FAILURE);
  }
   
  srand(randseed);
  if (summaryfp && debuglevel >= 100)
    fprintf(summaryfp, "# rule_bias_default (-a) = %g, stoptol (-s) = %g, minruleprob (-p) = %g, "
	    "maxlen (-l) = %d, minits (-m) = %d, maxits = (-n) = %d, annealstart (-b) = %g, "
	    "annealstop (-B) = %g, nanneal (-N) = %d, jitter0 (-j) = %g, jitter (-J) = %g, "
	    "VariationalBayes (-V) = %d, wordscale (-W) = %g, randseed (-S) = %d, "
	    "debuglevel (-d) = %d, nruns (-R) = %d\n",
	    rule_bias_default, stoptol, minruleprob, maxlen, minits, maxits, annealstart,
	    annealstop, nanneal, jitter0, jitter, VariationalBayes, wordscale, randseed, debuglevel, nruns);

  g0 = read_grammar(grammarfp, si, rule_bias_default);
  write_grammar(stdout, g0, si, minruleprob);

  set_rule_weights(g0, g0->weights, VariationalBayes);      /* normalize rule counts */

  signal(SIGINT, write_grammar_);

  for (irun = 0; irun < nruns; ++irun) {
    FLOAT entropy;

    g = copy_grammar(g0, si);

    if (summaryfp && debuglevel >= 100)
      fprintf(summaryfp, "# Run %d\n", irun);
    
    g_global = g;
    si_global = si;
    
    if (jitter0 > 0)
      jitter_weights(g, jitter0);
    
    entropy = inside_outside(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxlen,
			     minits, maxits, stoptol, minruleprob, jitter, VariationalBayes, wordscale,
			     annealstart, annealstop, nanneal); 

    if (summaryfp && debuglevel >= 0)
      fprintf(summaryfp, "# run %d, entropy %g, %ld rules\n", irun, entropy, (long) g->nrules);

    if (debuglevel >= 1) {
      write_grammar(stdout, g, si, minruleprob);

      FILE* grammarfp = fopen(filename, "w");
      //write_grammar(stdout, g, si, minruleprob);
      write_grammar(grammarfp, g, si, minruleprob);
      fprintf(stdout, "\n");
      fflush(stdout);
    }

    free_grammar(g);
  }
  
  free_grammar(g0);
  si_free(si);
    
  if (mmm_blocks_allocated) 
    fprintf(stderr, "Error in mrf(): %ld memory block(s) not deallocated\n", 
	    mmm_blocks_allocated);
  
  /* check that everything has been deallocated */
  assert(mmm_blocks_allocated == 0);		
  exit(EXIT_SUCCESS);
}
Exemplo n.º 2
0
FLOAT
inside_outside(grammar g, const si_t si, FILE *yieldfp, 
	       FILE *tracefp, FILE *summaryfp, int debuglevel,
	       int maxsentlen, int minits, int maxits,
	       FLOAT stoptol, FLOAT minruleprob,
	       FLOAT jitter, int VariationalBayes, FLOAT wordscale,
	       FLOAT annealstart, FLOAT annealstop, int nanneal,
	       int weighted_yields_flag)
{
  FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT));
  FLOAT sum_neglog_prob0;
  FLOAT sum_neglog_prob;
  int   iteration = 0;
  size_t nrules, nrules0;
  FLOAT sum_yieldweights;
  FLOAT temperature = annealstart;

  nrules = g->nrules;

  if (summaryfp && debuglevel >= 1000) {
    if (debuglevel < 5000)
      fprintf(summaryfp, "# Iteration\ttemperature\tnrules\t-logP\tbits/token\n%d\t%g\t%d", 
	      iteration, temperature, (int) nrules);
    else
      fprintf(summaryfp, "# Iteration %d, temperature = %g, %d rules, ",
	      iteration, temperature, (int) nrules);
    fflush(summaryfp);
  }

  sum_neglog_prob0 = expected_rule_counts(g, si, yieldfp, tracefp, 
					  summaryfp, debuglevel,
					  maxsentlen, minruleprob, wordscale,
					  rule_counts, &sum_yieldweights,
					  weighted_yields_flag);

  if (summaryfp && debuglevel >= 1000) {
    if (debuglevel < 5000)
      fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob0,
	      sum_neglog_prob0/(log(2)*(sum_yieldweights)));
    else
      fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob0,
	      sum_neglog_prob0/(log(2)*(sum_yieldweights)));
    fflush(summaryfp);
  }

  if (tracefp && debuglevel >= 10000) {
    write_rule_values(tracefp, g, si, rule_counts, 0);
    fprintf(tracefp, "\n");
    fflush(tracefp);
  }

  if (summaryfp && debuglevel >= 5000 && debuglevel < 10000)
    write_grammar(summaryfp, g, si, minruleprob);      

  while (1) {
    ++iteration;

    add_bias(g, rule_counts);
    set_rule_weights(g, rule_counts, VariationalBayes);
    prune_grammar(g, si, minruleprob);
    if (jitter != 0) 
      jitter_weights(g, jitter);
    set_rule_weights(g, g->weights, 0);
    if (iteration < nanneal) {
      temperature = annealstart*pow(annealstop/annealstart, (iteration-1.0)/(nanneal-1.0));
      scale_weights(g, 1.0/temperature);
    }
    else
      temperature = 1.0;
    nrules0 = nrules;
    nrules = g->nrules;

    if (summaryfp && debuglevel >= 1000) {
      if (debuglevel < 5000)
	fprintf(summaryfp, "%d\t%g\t%d", iteration, temperature, (int) nrules);
      else
	fprintf(summaryfp, "# Iteration %d, temperature %g, %d rules, ",
		iteration, temperature, (int) nrules);
      fflush(summaryfp);
    }

    sum_neglog_prob = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel,
					   maxsentlen, minruleprob, wordscale,
					   rule_counts, &sum_yieldweights, weighted_yields_flag);

    if (summaryfp && debuglevel >= 1000) {
      if (debuglevel < 5000)
	fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob,
		sum_neglog_prob/(log(2)*(sum_yieldweights)));
      else
	fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob,
		sum_neglog_prob/(log(2)*(sum_yieldweights)));
      fflush(summaryfp);
    }

    if (tracefp && debuglevel >= 10000) {
      write_rule_values(tracefp, g, si, rule_counts, 0);
      fprintf(tracefp, "\n");
      fflush(tracefp);
    }
    
    if (summaryfp && debuglevel >= 5000 && debuglevel < 10000)
      write_grammar(summaryfp, g, si, minruleprob);      

    if (nrules==nrules0 &&
	iteration >= minits &&
	((maxits > 0 && iteration >= maxits)
	 || (sum_neglog_prob0-sum_neglog_prob)/fabs(sum_neglog_prob) < stoptol))
      break;

    sum_neglog_prob0 = sum_neglog_prob;
  }

  FREE(rule_counts);

  return(sum_neglog_prob/(log(2)*sum_yieldweights));
}