static pp_linkset *read_link_set(pp_knowledge *k, const char *label, String_set *ss) { /* read link set, marked by label in knowledge file, into a set of links whose handle is returned. Return NULL if link set not defined in file, in which case the set is taken to be empty. */ int n_strings,i; pp_linkset *ls; if (!pp_lexer_set_label(k->lt, label)) { if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Link set %s not defined: assuming empty\n", k->path, label); n_strings = 0; } else { n_strings = pp_lexer_count_tokens_of_label(k->lt); if (-1 == n_strings) return &LINK_SET_ERROR; } ls = pp_linkset_open(n_strings); for (i=0; i<n_strings; i++) pp_linkset_add(ls, string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss)); return ls; }
void prune_irrelevant_rules(Postprocessor *pp) { /* call this (a) after having called post_process_scan_linkage() on all generated linkages, but (b) before calling post_process() on any particular linkage. Here we mark all rules which we know (from having accumulated a set of link names appearing in *any* linkage) won't ever be needed. */ pp_rule *rule; int coIDX, cnIDX, rcoIDX=0, rcnIDX=0; /* If we didn't scan any linkages, there's no pruning to be done. */ if (pp_linkset_population(pp->set_of_links_of_sentence)==0) return; for (coIDX=0;;coIDX++) { rule = &(pp->knowledge->contains_one_rules[coIDX]); if (rule->msg==NULL) break; if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector)) { /* mark rule as being relevant to this sentence */ pp->relevant_contains_one_rules[rcoIDX++] = coIDX; pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector); } } pp->relevant_contains_one_rules[rcoIDX] = -1; /* end sentinel */ for (cnIDX=0;;cnIDX++) { rule = &(pp->knowledge->contains_none_rules[cnIDX]); if (rule->msg==NULL) break; if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector)) { pp->relevant_contains_none_rules[rcnIDX++] = cnIDX; pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector); } } pp->relevant_contains_none_rules[rcnIDX] = -1; if (verbosity>1) { printf("Saw %i unique link names in all linkages.\n", pp_linkset_population(pp->set_of_links_of_sentence)); printf("Using %i 'contains one' rules and %i 'contains none' rules\n", rcoIDX, rcnIDX); } }
static void read_contains_rules(pp_knowledge *k, const char *label, pp_rule **rules, int *nRules) { /* Reading the 'contains_one_rules' and reading the 'contains_none_rules' into their respective arrays */ int n_commas, n_tokens, i, r; const char *p; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { *nRules = 0; if (verbosity>0) printf("PP warning: Not using any %s rules\n", label); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); *nRules = (n_commas + 1)/3; } *rules = (pp_rule*) xalloc ((1+*nRules)*sizeof(pp_rule)); for (r=0; r<*nRules; r++) { /* first read link */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].selector = string_set_add(tokens[0], k->string_set); /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); (*rules)[r].link_set = pp_linkset_open(n_tokens); (*rules)[r].link_set_size = n_tokens; (*rules)[r].link_array = (const char **) xalloc((1+n_tokens)*sizeof(const char*)); for (i=0; i<n_tokens; i++) { p = string_set_add(tokens[i], k->string_set); pp_linkset_add((*rules)[r].link_set, p); (*rules)[r].link_array[i] = p; } (*rules)[r].link_array[i]=0; /* NULL-terminator */ /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens>1) { prt_error("Fatal Error: post_process: Invalid syntax in %s (rule %i)",label,r+1); exit(1); } (*rules)[r].msg = string_set_add(tokens[0], k->string_set); } /* sentinel entry */ (*rules)[*nRules].msg = 0; }
/** * Call this (a) after having called post_process_scan_linkage() on all * generated linkages, but (b) before calling post_process() on any * particular linkage. Here we mark all rules which we know (from having * accumulated a set of link names appearing in *any* linkage) that won't * ever be needed. */ static void prune_irrelevant_rules(Postprocessor *pp) { pp_rule *rule; int coIDX, cnIDX, rcoIDX = 0, rcnIDX = 0; /* If we didn't scan any linkages, there's no pruning to be done. */ if (pp_linkset_population(pp->set_of_links_of_sentence) == 0) return; for (coIDX = 0; ; coIDX++) { rule = &(pp->knowledge->contains_one_rules[coIDX]); if (rule->msg == NULL) break; if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector)) { /* Mark rule as being relevant to this sentence */ pp->relevant_contains_one_rules[rcoIDX++] = coIDX; pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector); } } pp->relevant_contains_one_rules[rcoIDX] = -1; /* end sentinel */ for (cnIDX = 0; ; cnIDX++) { rule = &(pp->knowledge->contains_none_rules[cnIDX]); if (rule->msg == NULL) break; if (pp_linkset_match_bw(pp->set_of_links_of_sentence, rule->selector)) { pp->relevant_contains_none_rules[rcnIDX++] = cnIDX; pp_linkset_add(pp->set_of_links_in_an_active_rule, rule->selector); } } pp->relevant_contains_none_rules[rcnIDX] = -1; if (debug_level(5)) { printf("PP: Saw %zd unique link names in all linkages.\n", pp_linkset_population(pp->set_of_links_of_sentence)); printf("PP: Using %i 'contains one' rules and %i 'contains none' rules\n", rcoIDX, rcnIDX); } }
/** * During a first pass (prior to actual post-processing of the linkages * of a sentence), call this once for every generated linkage. Here we * simply maintain a set of "seen" link names for rule pruning, later on. */ void post_process_scan_linkage(Postprocessor *pp, Linkage linkage) { size_t i; if (pp == NULL) return; for (i = 0; i < linkage->num_links; i++) { assert(linkage->link_array[i].lw != SIZE_MAX); pp_linkset_add(pp->set_of_links_of_sentence, linkage->link_array[i].link_name); } }
static void initialize_set_of_links_starting_bounded_domain(pp_knowledge *k) { int i,j,d,domain_of_rule; k->set_of_links_starting_bounded_domain = pp_linkset_open(PP_MAX_UNIQUE_LINK_NAMES); for (i=0; k->bounded_rules[i].msg!=0; i++) { domain_of_rule = k->bounded_rules[i].domain; for (j=0; (d=(k->starting_link_lookup_table[j].domain))!=-1; j++) if (d==domain_of_rule) pp_linkset_add(k->set_of_links_starting_bounded_domain, k->starting_link_lookup_table[j].starting_link); } }
static bool read_form_a_cycle_rules(pp_knowledge *k, const char *label) { size_t n_commas, n_tokens; size_t r, i; pp_linkset *lsHandle; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { k->n_form_a_cycle_rules = 0; if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Not using any 'form a cycle' rules\n", k->path); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_form_a_cycle_rules = (n_commas + 1)/2; } k->form_a_cycle_rules= (pp_rule*) malloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule)); for (r=0; r<k->n_form_a_cycle_rules; r++) { /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens <= 0) { prt_error("Error: File %s: Syntax error\n", k->path); return false; } lsHandle = pp_linkset_open(n_tokens); for (i=0; i<n_tokens; i++) pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set)); k->form_a_cycle_rules[r].link_set = lsHandle; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Error: File %s: Invalid syntax (rule %zu of %s)\n", k->path, r+1,label); return false; } k->form_a_cycle_rules[r].msg = string_set_add(tokens[0], k->string_set); k->form_a_cycle_rules[r].use_count = 0; } /* sentinel entry */ k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0; k->form_a_cycle_rules[k->n_form_a_cycle_rules].use_count = 0; return true; }
void post_process_scan_linkage(Postprocessor *pp, Parse_Options opts, Sentence sent, Sublinkage *sublinkage) { /* During a first pass (prior to actual post-processing of the linkages of a sentence), call this once for every generated linkage. Here we simply maintain a set of "seen" link names for rule pruning later on */ char *p; int i; if (pp==NULL) return; if (sent->length < opts->twopass_length) return; for (i=0; i<sublinkage->num_links; i++) { if (sublinkage->link[i]->l == -1) continue; p=string_set_add(sublinkage->link[i]->name,pp->sentence_link_name_set); pp_linkset_add(pp->set_of_links_of_sentence, p); } }
static void read_form_a_cycle_rules(pp_knowledge *k, const char *label) { int n_commas, n_tokens, r, i; pp_linkset *lsHandle; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { k->n_form_a_cycle_rules = 0; if (verbosity>0) printf("PP warning: Not using any 'form a cycle' rules\n"); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); k->n_form_a_cycle_rules = (n_commas + 1)/2; } k->form_a_cycle_rules= (pp_rule*) xalloc ((1+k->n_form_a_cycle_rules)*sizeof(pp_rule)); for (r=0; r<k->n_form_a_cycle_rules; r++) { /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens <= 0) { prt_error("Fatal Error: syntax error in knowledge file"); exit(1); } lsHandle = pp_linkset_open(n_tokens); for (i=0; i<n_tokens; i++) pp_linkset_add(lsHandle,string_set_add(tokens[i], k->string_set)); k->form_a_cycle_rules[r].link_set=lsHandle; /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Fatal Error: post_process: Invalid syntax (rule %i of %s)",r+1,label); exit(1); } k->form_a_cycle_rules[r].msg=string_set_add(tokens[0],k->string_set); } /* sentinel entry */ k->form_a_cycle_rules[k->n_form_a_cycle_rules].msg = 0; }
static pp_linkset *read_link_set(pp_knowledge *k, const char *label, String_set *ss) { /* read link set, marked by label in knowledge file, into a set of links whose handle is returned. Return NULL if link set not defined in file, in which case the set is taken to be empty. */ int n_strings,i; pp_linkset *ls; if (!pp_lexer_set_label(k->lt, label)) { if (verbosity>0) printf("PP warning: Link set %s not defined: assuming empty.\n",label); n_strings = 0; } else n_strings = pp_lexer_count_tokens_of_label(k->lt); ls = pp_linkset_open(n_strings); for (i=0; i<n_strings; i++) pp_linkset_add(ls, string_set_add(pp_lexer_get_next_token_of_label(k->lt),ss)); return ls; }
static bool read_contains_rules(pp_knowledge *k, const char *label, pp_rule **rules, size_t *nRules) { /* Reading the 'contains_one_rules' and reading the 'contains_none_rules' into their respective arrays */ size_t n_tokens, i, r; int n_commas; const char *p; const char **tokens; if (!pp_lexer_set_label(k->lt, label)) { *nRules = 0; if (verbosity_level(+D_PPK)) prt_error("Warning: File %s: Not using any %s rules\n", k->path, label); } else { n_commas = pp_lexer_count_commas_of_label(k->lt); if (-1 == n_commas) return false; *nRules = (n_commas + 1)/3; } *rules = (pp_rule*) malloc ((1+*nRules)*sizeof(pp_rule)); for (r=0; r<*nRules; r++) { /* first read link */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Error: File %s: Invalid syntax in %s (rule %zu)\n", k->path, label, r+1); return false; } (*rules)[r].selector = string_set_add(tokens[0], k->string_set); /* read link set */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); (*rules)[r].link_set = pp_linkset_open(n_tokens); (*rules)[r].link_set_size = n_tokens; (*rules)[r].link_array = (const char **) malloc((1+n_tokens)*sizeof(const char*)); for (i=0; i<n_tokens; i++) { p = string_set_add(tokens[i], k->string_set); pp_linkset_add((*rules)[r].link_set, p); (*rules)[r].link_array[i] = p; } (*rules)[r].link_array[i]=0; /* NULL-terminator */ /* read error message */ tokens = pp_lexer_get_next_group_of_tokens_of_label(k->lt, &n_tokens); if (n_tokens > 1) { prt_error("Error: File %s: Invalid syntax in %s (rule %zu)\n", k->path, label, r+1); return false; } (*rules)[r].msg = string_set_add(tokens[0], k->string_set); (*rules)[r].use_count = 0; } /* sentinel entry */ (*rules)[*nRules].msg = 0; (*rules)[*nRules].use_count = 0; return true; }