Esempio n. 1
0
File: lncky.c Progetto: mjpost/cky
static vindex
read_terms(FILE *fp, si_t si)
{
  size_t i = 0, nsize = 10;
  vindex v = make_vindex(nsize);
  si_index term;

  while ((term = read_cat(fp, si))) {
    if (i >= nsize) {
      nsize *= 2;
      vindex_resize(v, nsize);
    }
    assert(i < nsize);
    vindex_ref(v,i++) = term;
  }
 
  if (i > 0) {
    v->n = i;
    vindex_resize(v, v->n);
    return (v);
  }
  else {
    vindex_free(v);
    return(NULL);
  }
}
Esempio n. 2
0
static vindex
read_terms(int weighted_yields_flag, FILE *fp, si_t si, float *yieldweight)
{
  *yieldweight = 1.0;
  if (weighted_yields_flag) {
    int result = fscanf(fp, "%g", yieldweight);

    if (result == EOF) 
      return(NULL);
    if (result != 1) {
      fprintf(stderr, "Error in read_terms in expected_counts.c: "
	      "Unable to read a weight for yield\n");
      exit(EXIT_FAILURE);
    }
  }
  
  {
    size_t i = 0, nsize = 10;
    vindex v = make_vindex(nsize);
    si_index term;

    while ((term = read_cat(fp, si))) {
      if (i >= nsize) {
	nsize *= 2;
	vindex_resize(v, nsize);
      }
      assert(i < nsize);
      vindex_ref(v,i++) = term;
    }
    
    if (i > 0) {
      v->n = i;
      vindex_resize(v, v->n);
      return (v);
    }
    else {
      vindex_free(v);
      return(NULL);
    }
  }
}
Esempio n. 3
0
File: lncky.c Progetto: mjpost/cky
int      
main(int argc, char **argv)
{
  si_t          si = make_si(1024);
  FILE          *grammarfp = stdin, *yieldfp;
  FILE		*tracefp = NULL;  	/* trace output */
  FILE		*summaryfp = stderr;	/* end of parse stats output */
  FILE		*parsefp = stdout;      /* parse trees */
  FILE		*probfp = NULL;         /* max_neglog_prob */

  chart_cell	root_cell;
  grammar	g;
  chart		c;
  vindex 	terms;
  int		maxsentlen = 0;
  int           sentenceno = 0, parsed_sentences = 0, failed_sentences = 0;
  double	sum_neglog_prob = 0;
  int           sentfrom = 0;
  int           sentto = 0;

  srand(RAND_SEED);	/* seed random number generator */

  if (argc<2 || argc>6) {
    fprintf(stderr, "%s yieldfile [maxsentlen [grammarfile [sentfrom sentto]]]\n", argv[0]);
    exit(EXIT_FAILURE);
  }

  if ((yieldfp = fopen(argv[1], "r")) == NULL) {
    fprintf(stderr, "%s: Couldn't open yieldfile %s\n", argv[0], argv[1]);
    exit(EXIT_FAILURE);
  }

  if (argc >= 3)
    if (!sscanf(argv[2], "%d", &maxsentlen)) {
      fprintf(stderr, "%s: Couldn't parse maxsentlen %s\n", argv[0], argv[2]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 4)
    if ((grammarfp = fopen(argv[3], "r")) == NULL) {
      fprintf(stderr, "%s: Couldn't open grammarfile %s\n", argv[0], argv[3]);
      exit(EXIT_FAILURE);
    }

  if (argc >= 6) {
    if (!sscanf(argv[4], "%d", &sentfrom)) {
      fprintf(stderr, "%s: Couldn't parse sentfrom %s\n", argv[0], argv[4]);
      exit(EXIT_FAILURE);
    }
    if (!sscanf(argv[5], "%d", &sentto)) {
      fprintf(stderr, "%s: Couldn't parse sentto %s\n", argv[0], argv[5]);
      exit(EXIT_FAILURE);
    }
  }

  g = read_grammar(grammarfp, si);
  /* write_grammar(tracefp, g, si); */

  while ((terms = read_terms(yieldfp, si))) {
    sentenceno++;

    if (sentfrom && sentenceno < sentfrom) {
      vindex_free(terms);
      continue;
    }
    if (sentto && sentenceno > sentto) {
      vindex_free(terms);
      break;
    }

    /* skip if sentence is too long */
    if (!maxsentlen || (int) terms->n <= maxsentlen) { 
      size_t	i;

      if (tracefp) {
	fprintf(tracefp, "\nSentence %d:\n", sentenceno);
	for (i=0; i<terms->n; i++)
	  fprintf(tracefp, " %s", si_index_string(si, terms->e[i]));
	fprintf(tracefp, "\n");
      }
     
      c = cky(*terms, g, si);

      /* fetch best root node */

      root_cell = sihashcc_ref(CHART_ENTRY(c, 0, terms->n), g.root_label);

      if (root_cell) {
	tree parse_tree = bintree_tree(&root_cell->tree, si);
	double prob = (double) root_cell->prob;

	parsed_sentences++;
	assert(prob > 0.0);
	sum_neglog_prob -= log(prob);

	if (probfp)
	  fprintf(probfp, "max_neglog_prob(%d, %g).\n", 
		  sentenceno, -log(prob)); 

	if (tracefp) 
	  fprintf(tracefp, " Prob = %g\n", prob);

	if (parsefp) {
	  write_tree(parsefp, parse_tree, si);
	  fprintf(parsefp, "\n");
	  /* write_prolog_tree(parsefp, parse_tree, si); */
	}

	free_tree(parse_tree);
      }

      else {
	failed_sentences++;
	if (tracefp)
	  fprintf(tracefp, "Failed to parse\n");
	if (parsefp)
	  fprintf(parsefp, "parse_failure.\n");
      }

      chart_free(c, terms->n);			/* free the chart */
    }
    else { 					/* sentence too long */
      if (parsefp)
	fprintf(parsefp, "too_long.\n");
    }

    vindex_free(terms);				/*  free the terms */
    assert(trees_allocated == 0);
    assert(bintrees_allocated == 0);
  }
  free_grammar(g);
  si_free(si);

  if (summaryfp) {
    fprintf(summaryfp, "\n%d/%d = %g%% test sentences met the length criteron,"
	    " of which %d/%d = %g%% were parsed\n", 
	    parsed_sentences+failed_sentences, sentenceno,
	    (double) (100.0 * (parsed_sentences+failed_sentences)) / 
	                       sentenceno,
	    parsed_sentences, parsed_sentences+failed_sentences, 
	    (double) (100.0 * parsed_sentences) / 
                              (parsed_sentences + failed_sentences));
    fprintf(summaryfp, "Sum(-log prob) = %g\n", sum_neglog_prob);
  }

  /* check that everything has been deallocated */
  /* printf("mmm_blocks_allocated = %ld\n", (long) mmm_blocks_allocated); */
  assert(mmm_blocks_allocated == 0);		
  exit(EXIT_SUCCESS);
}
Esempio n. 4
0
FLOAT 
expected_rule_counts(const grammar g, const si_t si, FILE *yieldfp, 
		     FILE *tracefp, FILE *summaryfp, int debuglevel,
		     int maxsentlen, FLOAT minruleprob, FLOAT wordscale,
		     FLOAT *rule_counts, FLOAT *sum_yieldweights,
		     int weighted_yields_flag)
{
  vindex  terms;
  FLOAT	  root_prob;
  chart	  inside, outside;
  long    sentenceno = 0, parsed_sentences = 0, failed_sentences = 0;
  double  sum_neglog_prob = 0.0;
  float   yieldweight;

  *sum_yieldweights = 0;
  /*  FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT)); */

  { size_t i;                    /* zero rule counts */
    for (i=0; i<g->nrules; i++)
      rule_counts[i] = 0.0;
  }

  compute_unary_closure(g, minruleprob); /* compute unary_close */

  rewind(yieldfp);               /* rewind the tree file */

  while ((terms = read_terms(weighted_yields_flag, yieldfp, si, &yieldweight))) {
    sentenceno++;

    if (summaryfp && debuglevel >= 10000) {
      size_t	i;
      fprintf(tracefp, "\nSentence %ld:\n", sentenceno);

      for (i=0; i<terms->n; i++)
	fprintf(tracefp, " %s", si_index_string(si, terms->e[i]));
      fprintf(tracefp, "\n");
    }
 
    /* skip if sentence is too long */
    if (!maxsentlen || (int) terms->n <= maxsentlen) {
      inside = inside_chart(terms, g, si, wordscale);
      /* chart_display(stdout, inside, terms->n, si);  */
      root_prob = sihashf_ref(CHART_ENTRY(inside, 0, terms->n), 
			      g->root_label);

      if (root_prob > 0.0) {
	if (tracefp && debuglevel >= 10000)
	  fprintf(tracefp, "Sum of derivation weights = %g\n", 
		  root_prob);
	sum_neglog_prob -= yieldweight*(log(root_prob)-terms->n*log(wordscale));
	*sum_yieldweights += yieldweight*terms->n;
	parsed_sentences++;
	outside = outside_chart(g, si, inside, terms, yieldweight, rule_counts);
	/* assert(consistent_preterm_outsides(outside, terms, root_prob)); */
	/* chart_display(stdout, outside, terms->n, si); */
	chart_free(outside, terms->n);
      }
      else {
	failed_sentences++;
	if (tracefp && debuglevel >= 10000)
	  fprintf(tracefp, "Failed to parse.\n");
      }
      chart_free(inside, terms->n);		/* free the chart */
    }
    else { 					/* sentence too long */
      if (tracefp && debuglevel >= 10000)
	fprintf(tracefp, "Too long to parse.\n");
    }
    vindex_free(terms);				/*  and its terms */
  }

  /* free unary closure */
  free_unary_closure(g);

  if (summaryfp && debuglevel >= 1000) {
    if (failed_sentences>0)
      fprintf(summaryfp, " %ld sentences failed to parse",
	      (long) failed_sentences);
  }
  return(sum_neglog_prob);
}