static chart outside_chart(const grammar g, const si_t si, const chart inside_chart, const vindex terms, FLOAT yieldweight, FLOAT *rule_counts) { int left, right; size_t nwords = terms->n; sihashf root_inside_cell = CHART_ENTRY(inside_chart, 0, nwords); FLOAT root_prob = sihashf_ref(root_inside_cell, g->root_label); chart outside_chart = chart_make(nwords); sihashf root_outside_cell = make_sihashf(CHART_CELLS); catproblist cp; root_prob /= yieldweight; /* pretend we saw this sentence this many times */ /* install root cell */ CHART_ENTRY(outside_chart, 0, nwords) = root_outside_cell; for (cp = g->parent_childprob[g->root_label]; cp; cp = cp->next) if (sihashf_ref(root_inside_cell, cp->cat) > 0.0) sihashf_set(root_outside_cell, cp->cat, cp->prob); increment_unary_counts(g, root_inside_cell, root_outside_cell, root_prob, rule_counts); for (right=nwords; right>=1; right--) for (left=0; left<right; left++) if ((left!=0)||(right!=nwords)) /* skip root cell */ binary_outside(g, left, right, nwords, inside_chart, outside_chart, root_prob, rule_counts); /* now update counts for unary rules expanding to terminals */ for (left=0; left < nwords; left++) { rulelist rl; sihashf outside_cell = CHART_ENTRY(outside_chart, left, left+1); for (rl=g->urules[terms->e[left]]; rl; rl=rl->next) { FLOAT outside_prob = sihashf_ref(outside_cell, g->rules[rl->ruleid]->e[0]) * g->weights[rl->ruleid]; rule_counts[rl->ruleid] += outside_prob/root_prob; sihashf_inc(outside_cell, terms->e[left], outside_prob); } } return outside_chart; }
chart cky(struct vindex terms, grammar g, si_t si) { int left, mid; chart c; c = chart_make(terms.n); /* insert lexical items */ for (left = 0; left < (int) terms.n; left++) { si_index label = terms.e[left]; sihashcc chart_entry = CHART_ENTRY(c, left, left+1); sihashcc left_vertex = c->vertex[left]; chart_cell cell = add_edge(chart_entry, label, NULL, NULL, 1.0, left+1, left_vertex); assert(cell); /* check that cell was actually added */ follow_unary(cell, chart_entry, g, left+1, left_vertex); } /* actually do syntactic rules! */ for (left = (int) terms.n-1; left >= 0; left--) { for (mid = left+1; mid < (int) terms.n; mid++) { sihashcc chart_entry = CHART_ENTRY(c, left, mid); /* unary close cell spanning from left to mid */ if (mid - left > 1) apply_unary(chart_entry, g, mid, c->vertex[left]); /* now apply binary rules */ apply_binary(chart_entry, left, mid, c, g); } /* apply unary rules to chart cells spanning from left to end of sentence * there's no need to apply binary rules to these */ apply_unary(CHART_ENTRY(c, left, terms.n), g, (int) terms.n, c->vertex[left]); /* printf("Chart entry %d-%d\n", (int) left, (int) right); chart_entry_display(CHART_ENTRY(c,left,right), si); */ } return c; }
static chart inside_chart(vindex terms, grammar g, si_t si, FLOAT wordscale) { int left, right, mid; chart c = chart_make(terms->n); /* parent_completes is a sihashf of completed categories (i.e., not the * new, active categories produced by binarization). Unary closure * applies to these categories. * The pre-unary-closure parent weights are stored in parent_completes * before unary closure is applied to them */ /* Inside pass */ /* insert lexical items */ for (left=0; left< (int) terms->n; left++) { rulelist rl; si_index terminal = terms->e[left]; sihashf chart_entry = make_sihashf(NLABELS); CHART_ENTRY(c, left, left+1) = chart_entry; assert(terminal>0); if (terminal<=g->nnts) { fprintf(stderr, "Error in inside_chart() in expected-counts.c: " "input contains nonterminal symbol %s\n", si_index_string(si, terminal)); exit(EXIT_FAILURE); } if (terminal>g->ncats) { fprintf(stderr, "Error in inside_chart() in expected-counts.c:" " input contains unknown terminal %s\n", si_index_string(si, terminal)); exit(EXIT_FAILURE); } /* no need to actually enter terminal into chart */ /* sihashf_set(chart_entry, terminal, 1.0); */ rl = g->urules[terminal]; assert(rl); /* check there are rules for this terminal */ for ( ; rl; rl=rl->next) { si_index preterminal = g->rules[rl->ruleid]->e[0]; FLOAT preterminal_prob = g->weights[rl->ruleid]*wordscale; catproblist pp; /* assert(rl->ruleid<g->nrules); */ assert(g->child_parentprob[preterminal]); for (pp = g->child_parentprob[preterminal]; pp; pp = pp->next) sihashf_inc(chart_entry, pp->cat, preterminal_prob*pp->prob); } /* fprintf(stderr, "Chart entry %d-%d\n", (int) left, (int) left+1); chart_entry_display(stderr, chart_entry, si); */ } for (right=2; right<= (int) terms->n; right++) for (left=right-2; left>=0; left--) { sihashf parent_completes = make_sihashf(COMPLETE_CELLS); sihashf chart_entry = make_sihashf(CHART_CELLS); CHART_ENTRY(c, left, right) = chart_entry; for (mid=left+1; mid<right; mid++) binary_inside(g, chart_entry, parent_completes, CHART_ENTRY(c,left,mid), CHART_ENTRY(c,mid,right)); unary_closure_inside(g, chart_entry, parent_completes); free_sihashf(parent_completes); /* fprintf(stdout, "Chart entry %d-%d\n", (int) left, (int) right); */ /* chart_entry_display(stdout, CHART_ENTRY(inside,left,right), si); */ } return c; }