Ejemplo n.º 1
0
static void
binary_inside(const grammar g, sihashf parent_entry, sihashf parent_completes,
	      sihashf left_entry, sihashf right_entry)
{
  si_index  right;

  /* if either the left chart cell or the right chart cell is empty,
   * then we have nothing to do
   */

  if (sihashf_size(left_entry) == 0 || sihashf_size(right_entry) == 0)
    return;

  for (right=1; right<=g->nnts; right++) {
    brule bp = g->brules[right];
    if (bp) {
      /* look up the rule's right category */
      FLOAT cr = sihashf_ref(right_entry, right);  
      if (cr>0.0)				   
	/* such categories exist in this cell */
	for ( ; bp; bp=bp->next) {
	  FLOAT cl;
	  cl = sihashf_ref(left_entry, bp->left);
	  if (cl>0.0) {
	    rulelist rp;
	    if (bp->active_parent)
	      /* actives go straight into chart */
	      sihashf_inc(parent_entry, bp->active_parent, cl*cr);
	    for (rp=bp->completes; rp; rp=rp->next) {
	      si_index parent_cat = g->rules[rp->ruleid]->e[0];
	      assert(parent_cat <= g->nnts);
	      sihashf_inc(parent_completes, parent_cat, 
			  cl*cr*g->weights[rp->ruleid]);
	    }}}}}
}
Ejemplo n.º 2
0
static void
unary_closure_inside(const grammar g, sihashf parent_entry, 
		     sihashf parent_completes)
{
  sihashfit childit;

  for (childit = sihashfit_init(parent_completes); 
       sihashfit_ok(childit); childit = sihashfit_next(childit)) {
    catproblist pp;
    for (pp = g->child_parentprob[childit.key]; pp; pp = pp->next)
      sihashf_inc(parent_entry, pp->cat, childit.value*pp->prob);
  }
}
Ejemplo n.º 3
0
static chart
outside_chart(const grammar g, const si_t si, const chart inside_chart,
	      const vindex terms, FLOAT yieldweight, FLOAT *rule_counts)
{
  int      left, right;
  size_t   nwords = terms->n;
  sihashf  root_inside_cell = CHART_ENTRY(inside_chart, 0, nwords);
  FLOAT    root_prob = sihashf_ref(root_inside_cell, g->root_label);
  chart    outside_chart = chart_make(nwords);
  sihashf  root_outside_cell = make_sihashf(CHART_CELLS);
  catproblist cp;
  
  root_prob /= yieldweight; /* pretend we saw this sentence this many times */

  /* install root cell */
  CHART_ENTRY(outside_chart, 0, nwords) = root_outside_cell; 

  for (cp = g->parent_childprob[g->root_label]; cp; cp = cp->next) 
    if (sihashf_ref(root_inside_cell, cp->cat) > 0.0)
      sihashf_set(root_outside_cell, cp->cat, cp->prob);

  increment_unary_counts(g, root_inside_cell, root_outside_cell, root_prob, 
			 rule_counts);

  for (right=nwords; right>=1; right--)
    for (left=0; left<right; left++)
      if ((left!=0)||(right!=nwords))         /* skip root cell */
	binary_outside(g, left, right, nwords, 
		       inside_chart, outside_chart, root_prob, rule_counts);

  /* now update counts for unary rules expanding to terminals */

  for (left=0; left < nwords; left++) {
    rulelist rl;
    sihashf  outside_cell = CHART_ENTRY(outside_chart, left, left+1);
    for (rl=g->urules[terms->e[left]]; rl; rl=rl->next) {
      FLOAT outside_prob = sihashf_ref(outside_cell,
				       g->rules[rl->ruleid]->e[0]) *
                           g->weights[rl->ruleid];
      rule_counts[rl->ruleid] += outside_prob/root_prob;
      sihashf_inc(outside_cell, terms->e[left], outside_prob);
    }
  }

  return outside_chart;
}
Ejemplo n.º 4
0
static void
binary_outside(const grammar g, size_t left_pos, size_t right_pos, 
	       size_t nwords, const chart inside_chart, chart outside_chart, 
	       FLOAT root_prob, FLOAT *rule_counts)
{
  si_index  right_cat;
  FLOAT	    *completes;
  sihashf   child_outside = make_sihashf(CHART_CELLS);
  sihashf   child_inside = CHART_ENTRY(inside_chart, left_pos, right_pos);

  CHART_ENTRY(outside_chart, left_pos, right_pos) = child_outside;
  
  /* if the inside chart cell is empty, then there's no point calculating
   * the outside cell
   */

  if (sihashf_size(child_inside) == 0)
    return;

  completes = MALLOC((g->nnts+1)*sizeof(FLOAT));
  { size_t child_cat;
    for (child_cat=1; child_cat<=g->nnts; child_cat++)
      completes[child_cat] = 0.0;   
  } 

  /* try to combine with cells on left */

  for (right_cat=1; right_cat<=g->nnts; right_cat++) {
    brule bp = g->brules[right_cat];
    FLOAT right_inside_weight = sihashf_ref(child_inside, right_cat);

    /* rules and inside category? */
    if (bp&&(right_inside_weight>0.0)) {
      for ( ; bp; bp=bp->next) {
	size_t far_left_pos;
	for (far_left_pos=0; far_left_pos<left_pos; far_left_pos++) {
	  FLOAT far_left_inside_weight =
	    sihashf_ref(CHART_ENTRY(inside_chart, far_left_pos, left_pos), 
			bp->left);
	  if (far_left_inside_weight>0.0) {
	    rulelist rp;
	    if (bp->active_parent) {
	      FLOAT parent_outside_weight =
		sihashf_ref(CHART_ENTRY(outside_chart,far_left_pos,right_pos), 
			    bp->active_parent);
	      if (parent_outside_weight>0.0)
		completes[right_cat] += parent_outside_weight*
		                        far_left_inside_weight;
	    }
	    for (rp=bp->completes; rp; rp=rp->next) {
	      FLOAT parent_outside_weight = 
		sihashf_ref(CHART_ENTRY(outside_chart,far_left_pos,right_pos), 
			    g->rules[rp->ruleid]->e[0]);
	      if (parent_outside_weight>0.0) {
		FLOAT parent_left_rule_weight = parent_outside_weight*
		                                far_left_inside_weight*
                                                g->weights[rp->ruleid];
		completes[right_cat] += parent_left_rule_weight ;
		rule_counts[rp->ruleid] += parent_left_rule_weight*
		                           right_inside_weight/root_prob;
	      }}}}}}}

  /* try to combine with cells on right */

  for (right_cat=1; right_cat<=g->nnts; right_cat++) {
    brule bp;
    size_t far_right_pos;
    for (bp=g->brules[right_cat]; bp; bp=bp->next)
      for (far_right_pos=right_pos+1; far_right_pos<=nwords; far_right_pos++) {
	FLOAT far_right_inside_weight = 
	      sihashf_ref(CHART_ENTRY(inside_chart, right_pos, far_right_pos),
			  right_cat);
	si_index child_cat=bp->left;
	FLOAT    child_inside_weight = sihashf_ref(child_inside, child_cat);
	if ((far_right_inside_weight>0.0)&&(child_inside_weight>0.0)) {
	  rulelist rp;
	  if (bp->active_parent) {
	    FLOAT parent_outside_weight =
	      sihashf_ref(CHART_ENTRY(outside_chart, left_pos, far_right_pos),
			  bp->active_parent);
	    if (parent_outside_weight>0.0) {
	      if (child_cat<=g->nnts)
		completes[child_cat] += parent_outside_weight*
		                        far_right_inside_weight;
	      else {
		assert(child_cat>g->ncats); /* otherwise child_cat is a term */
		sihashf_inc(child_outside, child_cat, 
			    parent_outside_weight*far_right_inside_weight);
	      }
	    }}
	  for (rp=bp->completes; rp; rp=rp->next) {
	    FLOAT parent_outside_weight =
	      sihashf_ref(CHART_ENTRY(outside_chart, left_pos, far_right_pos),
			  g->rules[rp->ruleid]->e[0]);
	    if (parent_outside_weight>0.0) {
	      FLOAT parent_right_rule_weight = parent_outside_weight*
		                               far_right_inside_weight*
		                               g->weights[rp->ruleid];
	      if (child_cat<=g->nnts)
		completes[child_cat] += parent_right_rule_weight;
	      else {
		assert(child_cat>g->ncats); /* otherwise child_cat is a term */
		sihashf_inc(child_outside,child_cat,parent_right_rule_weight);
	      }
	      /* don't double count the rule! 
               * it's been counted before from the left 
	       */
	      /* rule_counts[rp->ruleid] += parent_right_rule_weight*
                                            child_inside_weight/root_prob; 
	       */
	    }}}}}

  /* unary closure */  
  /* unary closure for root cell done in outside_chart() */

  { si_index parent_cat;
  
    for (parent_cat=1; parent_cat<=g->nnts; parent_cat++) {
      FLOAT parent_outside_weight = completes[parent_cat];
      if (parent_outside_weight>0.0) {
	catproblist cp;
	for (cp = g->parent_childprob[parent_cat]; cp; cp = cp->next) 
	  if (sihashf_ref(child_inside, cp->cat) > 0.0)
	    sihashf_inc(child_outside, cp->cat, cp->prob*parent_outside_weight);
      }}}

  /* increment unary rule_counts */ 
  /* rule counts for root cell done in outside_chart() */
  increment_unary_counts(g, child_inside, child_outside, root_prob,
			 rule_counts);

  FREE(completes);
}
Ejemplo n.º 5
0
static chart
inside_chart(vindex terms, grammar g, si_t si, FLOAT wordscale)
{
  int left, right, mid;
  chart c = chart_make(terms->n);

  /* parent_completes is a sihashf of completed categories (i.e., not the
   * new, active categories produced by binarization).  Unary closure
   * applies to these categories.
   * The pre-unary-closure parent weights are stored in parent_completes
   * before unary closure is applied to them
   */

  /* Inside pass */

  /* insert lexical items */

  for (left=0; left< (int) terms->n; left++) {
    rulelist    rl;
    si_index	terminal = terms->e[left];
    sihashf	chart_entry = make_sihashf(NLABELS);

    CHART_ENTRY(c, left, left+1) = chart_entry;

    assert(terminal>0);
    if (terminal<=g->nnts) {
      fprintf(stderr, 
	      "Error in inside_chart() in expected-counts.c: "
	      "input contains nonterminal symbol %s\n", 
	      si_index_string(si, terminal));
      exit(EXIT_FAILURE);
    }
    if (terminal>g->ncats) {
      fprintf(stderr, 
	      "Error in inside_chart() in expected-counts.c:"
	      " input contains unknown terminal %s\n", 
	      si_index_string(si, terminal));
      exit(EXIT_FAILURE);
    }

    /* no need to actually enter terminal into chart */
    /* sihashf_set(chart_entry, terminal, 1.0); */

    rl = g->urules[terminal];
    assert(rl);   /* check there are rules for this terminal */
    for ( ; rl; rl=rl->next) {
      si_index preterminal = g->rules[rl->ruleid]->e[0];
      FLOAT preterminal_prob = g->weights[rl->ruleid]*wordscale;
      catproblist pp;
      /* assert(rl->ruleid<g->nrules); */
      assert(g->child_parentprob[preterminal]);
      for (pp = g->child_parentprob[preterminal]; pp; pp = pp->next)
	sihashf_inc(chart_entry, pp->cat, preterminal_prob*pp->prob);
    }

    /* fprintf(stderr, "Chart entry %d-%d\n", (int) left, (int) left+1);
       chart_entry_display(stderr, chart_entry, si); */
  }

  for (right=2; right<= (int) terms->n; right++)
    for (left=right-2; left>=0; left--) {
      sihashf parent_completes = make_sihashf(COMPLETE_CELLS);
      sihashf chart_entry = make_sihashf(CHART_CELLS);   
      CHART_ENTRY(c, left, right) = chart_entry;

      for (mid=left+1; mid<right; mid++) 
	binary_inside(g, chart_entry, parent_completes,
		      CHART_ENTRY(c,left,mid), CHART_ENTRY(c,mid,right));

      unary_closure_inside(g, chart_entry, parent_completes);
      free_sihashf(parent_completes);
      
      /* fprintf(stdout, "Chart entry %d-%d\n", (int) left, (int) right); */
      /* chart_entry_display(stdout, CHART_ENTRY(inside,left,right), si); */   
    }

  return c;
}
Ejemplo n.º 6
0
grammar
read_grammar(FILE *fp, si_t si) 
{
  sihashbrs left_brules_ht = make_sihashbrs(NLABELS);
  sihashurs child_urules_ht = make_sihashurs(NLABELS);
  sihashf parent_weight_ht = make_sihashf(NLABELS);
  brihashbr brihtbr = make_brihashbr(NLABELS);
  int n;
  double weight;
  urule ur;
  sihashbrsit bhit;
  sihashursit uhit;
  size_t  root_label = 0, lhs, cat, rhs[MAXRHS];

  while ((n = fscanf(fp, " %lg ", &weight)) == 1) {	/* read the count */
    lhs = read_cat(fp, si);
    assert(weight > 0);
    assert(lhs);
    if (!root_label)
      root_label = lhs;
    
    fscanf(fp, " " REWRITES);				/* read the rewrites symbol */

    for (n=0; n<MAXRHS; n++) {				/* read the rhs, n is length of rhs */
      cat = read_cat(fp, si);
      if (!cat)
	break;
      rhs[n] = cat;
    }

    if (n >= MAXRHS) {
      fprintf(stderr, "read_grammar() in grammar.c: rule rhs too long\n");
      exit(EXIT_FAILURE);
    }

    switch (n) {
    case 0: 
      fprintf(stderr, "read_grammar() in grammar.c: rule with empty rhs\n");
      exit(EXIT_FAILURE);
      break;
    case 1: 
      ur = make_urule(weight, lhs, rhs[0]);
      push_urule(child_urules_ht, ur->child, ur);
      sihashf_inc(parent_weight_ht, ur->parent, weight);
      break;
    case 2:
      add_brule(left_brules_ht, brihtbr, weight, lhs, rhs[0], rhs[1]);
      sihashf_inc(parent_weight_ht, lhs, weight);
      break;
    default: 
      { int start, i, j;
        char bcat[MAXBLABELLEN], *s;
	si_index bparent, left, right;

	right = rhs[n-1];		/* rightmost category */
	for (start=n-2; start>=1; start--) {
	  
	  i = 0;			/* i is index into bcat[] */
	  for (j=start; j<n; j++) {     /* j is index into rhs[] */
	    if (j!=start) {
	      bcat[i++] = BINSEP;
	      assert(i < MAXBLABELLEN);
	    }
	    
	    s = si_index_string(si, rhs[j]);
	    while (*s) {
	      bcat[i++] = *s++;
	      assert(i < MAXBLABELLEN);
	  }}

	  bcat[i] = '\0';
	  bparent = si_string_index(si, bcat);
	  left = rhs[start];
	  add_brule(left_brules_ht, brihtbr, weight, bparent, left, right);
	  sihashf_inc(parent_weight_ht, bparent, weight);
	  right = bparent;
	}
	
	add_brule(left_brules_ht, brihtbr, weight, lhs, rhs[0], right);
	sihashf_inc(parent_weight_ht, lhs, weight);
      }}}
  
  free_brihashbr(brihtbr);	/* free brindex hash table */

  { 
    int i; /* normalize grammar rules */

    for (bhit = sihashbrsit_init(left_brules_ht); sihashbrsit_ok(bhit); bhit = sihashbrsit_next(bhit))
      for (i=0; i<bhit.value.n; i++) 
	bhit.value.e[i]->prob /= sihashf_ref(parent_weight_ht, bhit.value.e[i]->parent);

    for (uhit = sihashursit_init(child_urules_ht); sihashursit_ok(uhit); uhit = sihashursit_next(uhit))
      for (i=0; i<uhit.value.n; i++) 
	uhit.value.e[i]->prob /= sihashf_ref(parent_weight_ht, uhit.value.e[i]->parent);
  }
  
  free_sihashf(parent_weight_ht);
 
  {
    grammar g;
    g.urs = child_urules_ht;
    g.brs = left_brules_ht;
    g.root_label = root_label;
    return g;
  }
}