Exemple #1
0
static chart
outside_chart(const grammar g, const si_t si, const chart inside_chart,
	      const vindex terms, FLOAT yieldweight, FLOAT *rule_counts)
{
  int      left, right;
  size_t   nwords = terms->n;
  sihashf  root_inside_cell = CHART_ENTRY(inside_chart, 0, nwords);
  FLOAT    root_prob = sihashf_ref(root_inside_cell, g->root_label);
  chart    outside_chart = chart_make(nwords);
  sihashf  root_outside_cell = make_sihashf(CHART_CELLS);
  catproblist cp;
  
  root_prob /= yieldweight; /* pretend we saw this sentence this many times */

  /* install root cell */
  CHART_ENTRY(outside_chart, 0, nwords) = root_outside_cell; 

  for (cp = g->parent_childprob[g->root_label]; cp; cp = cp->next) 
    if (sihashf_ref(root_inside_cell, cp->cat) > 0.0)
      sihashf_set(root_outside_cell, cp->cat, cp->prob);

  increment_unary_counts(g, root_inside_cell, root_outside_cell, root_prob, 
			 rule_counts);

  for (right=nwords; right>=1; right--)
    for (left=0; left<right; left++)
      if ((left!=0)||(right!=nwords))         /* skip root cell */
	binary_outside(g, left, right, nwords, 
		       inside_chart, outside_chart, root_prob, rule_counts);

  /* now update counts for unary rules expanding to terminals */

  for (left=0; left < nwords; left++) {
    rulelist rl;
    sihashf  outside_cell = CHART_ENTRY(outside_chart, left, left+1);
    for (rl=g->urules[terms->e[left]]; rl; rl=rl->next) {
      FLOAT outside_prob = sihashf_ref(outside_cell,
				       g->rules[rl->ruleid]->e[0]) *
                           g->weights[rl->ruleid];
      rule_counts[rl->ruleid] += outside_prob/root_prob;
      sihashf_inc(outside_cell, terms->e[left], outside_prob);
    }
  }

  return outside_chart;
}
Exemple #2
0
chart
cky(struct vindex terms, grammar g, si_t si)
{
  int left, mid;
  chart c;

  c = chart_make(terms.n);
  
  /* insert lexical items */

  for (left = 0; left < (int) terms.n; left++) {
    si_index	label = terms.e[left];
    sihashcc    chart_entry = CHART_ENTRY(c, left, left+1);
    sihashcc    left_vertex = c->vertex[left];
    chart_cell  cell = add_edge(chart_entry, label, NULL, NULL, 1.0, 
				left+1, left_vertex);    
    
    assert(cell);  /* check that cell was actually added */
    follow_unary(cell, chart_entry, g, left+1, left_vertex);
  }

  /* actually do syntactic rules! */

  for (left = (int) terms.n-1; left >= 0; left--) {
    for (mid = left+1; mid < (int) terms.n; mid++) {
      sihashcc chart_entry = CHART_ENTRY(c, left, mid);
      /* unary close cell spanning from left to mid */
      if (mid - left > 1)
	apply_unary(chart_entry, g, mid, c->vertex[left]);
      /* now apply binary rules */
      apply_binary(chart_entry, left, mid, c, g);
    }
    /* apply unary rules to chart cells spanning from left to end of sentence
     * there's no need to apply binary rules to these
     */
    apply_unary(CHART_ENTRY(c, left, terms.n), g, 
		(int) terms.n, c->vertex[left]);
    /*
      printf("Chart entry %d-%d\n", (int) left, (int) right);
      chart_entry_display(CHART_ENTRY(c,left,right), si);
     */
  }
  return c;
}
Exemple #3
0
static chart
inside_chart(vindex terms, grammar g, si_t si, FLOAT wordscale)
{
  int left, right, mid;
  chart c = chart_make(terms->n);

  /* parent_completes is a sihashf of completed categories (i.e., not the
   * new, active categories produced by binarization).  Unary closure
   * applies to these categories.
   * The pre-unary-closure parent weights are stored in parent_completes
   * before unary closure is applied to them
   */

  /* Inside pass */

  /* insert lexical items */

  for (left=0; left< (int) terms->n; left++) {
    rulelist    rl;
    si_index	terminal = terms->e[left];
    sihashf	chart_entry = make_sihashf(NLABELS);

    CHART_ENTRY(c, left, left+1) = chart_entry;

    assert(terminal>0);
    if (terminal<=g->nnts) {
      fprintf(stderr, 
	      "Error in inside_chart() in expected-counts.c: "
	      "input contains nonterminal symbol %s\n", 
	      si_index_string(si, terminal));
      exit(EXIT_FAILURE);
    }
    if (terminal>g->ncats) {
      fprintf(stderr, 
	      "Error in inside_chart() in expected-counts.c:"
	      " input contains unknown terminal %s\n", 
	      si_index_string(si, terminal));
      exit(EXIT_FAILURE);
    }

    /* no need to actually enter terminal into chart */
    /* sihashf_set(chart_entry, terminal, 1.0); */

    rl = g->urules[terminal];
    assert(rl);   /* check there are rules for this terminal */
    for ( ; rl; rl=rl->next) {
      si_index preterminal = g->rules[rl->ruleid]->e[0];
      FLOAT preterminal_prob = g->weights[rl->ruleid]*wordscale;
      catproblist pp;
      /* assert(rl->ruleid<g->nrules); */
      assert(g->child_parentprob[preterminal]);
      for (pp = g->child_parentprob[preterminal]; pp; pp = pp->next)
	sihashf_inc(chart_entry, pp->cat, preterminal_prob*pp->prob);
    }

    /* fprintf(stderr, "Chart entry %d-%d\n", (int) left, (int) left+1);
       chart_entry_display(stderr, chart_entry, si); */
  }

  for (right=2; right<= (int) terms->n; right++)
    for (left=right-2; left>=0; left--) {
      sihashf parent_completes = make_sihashf(COMPLETE_CELLS);
      sihashf chart_entry = make_sihashf(CHART_CELLS);   
      CHART_ENTRY(c, left, right) = chart_entry;

      for (mid=left+1; mid<right; mid++) 
	binary_inside(g, chart_entry, parent_completes,
		      CHART_ENTRY(c,left,mid), CHART_ENTRY(c,mid,right));

      unary_closure_inside(g, chart_entry, parent_completes);
      free_sihashf(parent_completes);
      
      /* fprintf(stdout, "Chart entry %d-%d\n", (int) left, (int) right); */
      /* chart_entry_display(stdout, CHART_ENTRY(inside,left,right), si); */   
    }

  return c;
}