static int apply_must_form_a_cycle(Postprocessor *pp,Sublinkage *sublinkage,pp_rule *rule) { /* Returns TRUE if the linkage is connected when ignoring the links whose names are in the given list of link names. Actually, what it does is this: it returns FALSE if the connectivity of the subgraph reachable from word 0 changes as a result of deleting these links. */ List_o_links *lol; int w; for (w=0; w<pp->pp_data.length; w++) { for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) { if (w > lol->word) continue; /* only consider each edge once */ if (!pp_linkset_match(rule->link_set, sublinkage->link[lol->link]->name)) continue; memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0])); reachable_without_dfs(pp, sublinkage, w, lol->word, w); if (!pp->visited[lol->word]) return FALSE; } } for (lol = pp->pp_data.links_to_ignore; lol != NULL; lol = lol->next) { w = sublinkage->link[lol->link]->l; /* (w, lol->word) are the left and right ends of the edge we're considering */ if (!pp_linkset_match(rule->link_set, sublinkage->link[lol->link]->name)) continue; memset(pp->visited, 0, pp->pp_data.length*(sizeof pp->visited[0])); reachable_without_dfs(pp, sublinkage, w, lol->word, w); if (!pp->visited[lol->word]) return FALSE; } return TRUE; }
/** * Returns true if the linkage is connected when ignoring the links * whose names are in the given list of link names. * Actually, what it does is this: it returns false if the connectivity * of the subgraph reachable from word 0 changes as a result of deleting * these links. */ static bool apply_must_form_a_cycle(PP_data *pp_data, Linkage sublinkage, pp_rule *rule) { List_o_links *lol; size_t w; for (w = 0; w < pp_data->num_words; w++) { for (lol = pp_data->word_links[w]; lol != NULL; lol = lol->next) { if (w > lol->word) continue; /* only consider each edge once */ if (!pp_linkset_match(rule->link_set, sublinkage->link_array[lol->link].link_name)) continue; clear_visited(pp_data); reachable_without_dfs(pp_data, sublinkage, w, lol->word, w); if (!pp_data->visited[lol->word]) return false; } } for (lol = pp_data->links_to_ignore; lol != NULL; lol = lol->next) { w = sublinkage->link_array[lol->link].lw; /* (w, lol->word) are the left and right ends of the edge we're considering */ if (!pp_linkset_match(rule->link_set, sublinkage->link_array[lol->link].link_name)) continue; clear_visited(pp_data); reachable_without_dfs(pp_data, sublinkage, w, lol->word, w); assert(lol->word < pp_data->num_words, "Bad word index"); if (!pp_data->visited[lol->word]) return false; } return true; }
static void d_depth_first_search(Postprocessor *pp, Linkage sublinkage, size_t w, size_t root, size_t right, size_t start_link) { List_o_links * lol; PP_data *pp_data = &pp->pp_data; assert(w < pp_data->num_words, "Bad word index"); pp_data->visited[w] = true; for (lol = pp_data->word_links[w]; lol != NULL; lol = lol->next) { if ((lol->word < w) && (lol->link != start_link) && (w != root)) { add_link_to_domain(pp_data, lol->link); } } for (lol = pp_data->word_links[w]; lol != NULL; lol = lol->next) { assert(lol->word < pp_data->num_words, "Bad word index"); if (!pp_data->visited[lol->word] && !(w == root && lol->word >= right) && !(w == root && lol->word < root) && !(lol->word < root && lol->word < w && pp_linkset_match(pp->knowledge->restricted_links, sublinkage->link_array[lol->link].link_name))) { d_depth_first_search(pp,sublinkage,lol->word,root,right,start_link); } } }
static void connectivity_dfs(Postprocessor *pp, Sublinkage *sublinkage, int w, pp_linkset *ls) { List_o_links *lol; pp->visited[w] = TRUE; for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) { if (!pp->visited[lol->word] && !pp_linkset_match(ls, sublinkage->link[lol->link]->name)) connectivity_dfs(pp, sublinkage, lol->word, ls); } }
static void connectivity_dfs(Postprocessor *pp, Linkage sublinkage, int w, pp_linkset *ls) { List_o_links *lol; assert(w < pp_data->num_words, "Bad word index"); pp_data->visited[w] = true; for (lol = pp_data->word_links[w]; lol != NULL; lol = lol->next) { if (!pp_data->visited[lol->word] && !pp_linkset_match(ls, sublinkage->link[lol->link]->name)) connectivity_dfs(pp, sublinkage, lol->word, ls); } }
/** * fill in the pp->pp_data.word_links array with a list of words * neighboring each word (actually a list of links). This is an * undirected graph. */ static void build_graph(Postprocessor *pp, Linkage sublinkage) { size_t link; List_o_links * lol; PP_data *pp_data = &pp->pp_data; /* Get more size, if needed */ if (pp_data->wowlen <= pp_data->num_words) { size_t newsz; pp_data->wowlen += pp_data->num_words; newsz = pp_data->wowlen * sizeof(List_o_links *); pp_data->word_links = (List_o_links **) realloc( pp_data->word_links, newsz); } memset(pp_data->word_links, 0, pp_data->wowlen * sizeof(List_o_links *)); for (link = 0; link < sublinkage->num_links; link++) { assert (sublinkage->link_array[link].lw != SIZE_MAX); if (NULL == sublinkage->link_array[link].link_name) continue; if (pp_linkset_match(pp->knowledge->ignore_these_links, sublinkage->link_array[link].link_name)) { lol = (List_o_links *) malloc(sizeof(List_o_links)); lol->next = pp_data->links_to_ignore; pp_data->links_to_ignore = lol; lol->link = link; lol->word = sublinkage->link_array[link].rw; continue; } lol = (List_o_links *) malloc(sizeof(List_o_links)); lol->next = pp_data->word_links[sublinkage->link_array[link].lw]; pp_data->word_links[sublinkage->link_array[link].lw] = lol; lol->link = link; lol->word = sublinkage->link_array[link].rw; lol = (List_o_links *) malloc(sizeof(List_o_links)); lol->next = pp_data->word_links[sublinkage->link_array[link].rw]; pp_data->word_links[sublinkage->link_array[link].rw] = lol; lol->link = link; lol->word = sublinkage->link_array[link].lw; } }
static void bad_depth_first_search(Postprocessor *pp, Sublinkage *sublinkage, int w, int root, int start_link) { List_o_links * lol; pp->visited[w] = TRUE; for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) { if ((lol->word < w) && (lol->link != start_link) && (w != root)) { add_link_to_domain(pp, lol->link); } } for (lol = pp->pp_data.word_links[w]; lol != NULL; lol = lol->next) { if ((!pp->visited[lol->word]) && !(w == root && lol->word < w) && !(lol->word < root && lol->word < w && pp_linkset_match(pp->knowledge->restricted_links, sublinkage->link[lol->link]->name))) bad_depth_first_search(pp, sublinkage, lol->word, root, start_link); } }
static void build_graph(Postprocessor *pp, Sublinkage *sublinkage) { /* fill in the pp->pp_data.word_links array with a list of words neighboring each word (actually a list of links). The dir fields are not set, since this (after fat-link-extraction) is an undirected graph. */ int i, link; List_o_links * lol; for (i=0; i<pp->pp_data.length; i++) pp->pp_data.word_links[i] = NULL; for (link=0; link<sublinkage->num_links; link++) { if (sublinkage->link[link]->l == -1) continue; if (pp_linkset_match(pp->knowledge->ignore_these_links, sublinkage->link[link]->name)) { lol = (List_o_links *) xalloc(sizeof(List_o_links)); lol->next = pp->pp_data.links_to_ignore; pp->pp_data.links_to_ignore = lol; lol->link = link; lol->word = sublinkage->link[link]->r; continue; } lol = (List_o_links *) xalloc(sizeof(List_o_links)); lol->next = pp->pp_data.word_links[sublinkage->link[link]->l]; pp->pp_data.word_links[sublinkage->link[link]->l] = lol; lol->link = link; lol->word = sublinkage->link[link]->r; lol = (List_o_links *) xalloc(sizeof(List_o_links)); lol->next = pp->pp_data.word_links[sublinkage->link[link]->r]; pp->pp_data.word_links[sublinkage->link[link]->r] = lol; lol->link = link; lol->word = sublinkage->link[link]->l; } }
static void build_domains(Postprocessor *pp, Linkage sublinkage) { size_t link, i, d; const char *s; PP_data *pp_data = &pp->pp_data; pp_data->N_domains = 0; for (link = 0; link<sublinkage->num_links; link++) { assert (sublinkage->link_array[link].lw != SIZE_MAX); if (NULL == sublinkage->link_array[link].link_name) continue; s = sublinkage->link_array[link].link_name; if (pp_linkset_match(pp->knowledge->ignore_these_links, s)) continue; if (pp_linkset_match(pp->knowledge->domain_starter_links, s)) { setup_domain_array(pp, s, link); if (pp_linkset_match(pp->knowledge->domain_contains_links, s)) add_link_to_domain(pp_data, link); clear_visited(pp_data); depth_first_search(pp, sublinkage, sublinkage->link_array[link].rw, sublinkage->link_array[link].lw, link); } else if (pp_linkset_match(pp->knowledge->urfl_domain_starter_links, s)) { setup_domain_array(pp, s, link); /* always add the starter link to its urfl domain */ add_link_to_domain(pp_data, link); clear_visited(pp_data); bad_depth_first_search(pp, sublinkage,sublinkage->link_array[link].rw, sublinkage->link_array[link].lw, link); } else if (pp_linkset_match(pp->knowledge->urfl_only_domain_starter_links, s)) { setup_domain_array(pp, s, link); /* do not add the starter link to its urfl_only domain */ clear_visited(pp_data); d_depth_first_search(pp, sublinkage, sublinkage->link_array[link].lw, sublinkage->link_array[link].lw, sublinkage->link_array[link].rw, link); } else if (pp_linkset_match(pp->knowledge->left_domain_starter_links, s)) { setup_domain_array(pp, s, link); /* do not add the starter link to a left domain */ clear_visited(pp_data); left_depth_first_search(pp, sublinkage, sublinkage->link_array[link].lw, sublinkage->link_array[link].rw, link); } } /* sort the domains by size */ qsort((void *) pp_data->domain_array, pp_data->N_domains, sizeof(Domain), (int (*)(const void *, const void *)) domain_compare); /* sanity check: all links in all domains have a legal domain name */ for (d = 0; d < pp_data->N_domains; d++) { i = find_domain_name(pp, pp_data->domain_array[d].string); if (i == SIZE_MAX) prt_error("Error: post_process(): Need an entry for %s in LINK_TYPE_TABLE", pp_data->domain_array[d].string); pp_data->domain_array[d].type = i; } }
static void build_domains(Postprocessor *pp, Sublinkage *sublinkage) { int link, i, d; char *s; pp->pp_data.N_domains = 0; for (link = 0; link<sublinkage->num_links; link++) { if (sublinkage->link[link]->l == -1) continue; s = sublinkage->link[link]->name; if (pp_linkset_match(pp->knowledge->ignore_these_links, s)) continue; if (pp_linkset_match(pp->knowledge->domain_starter_links, s)) { setup_domain_array(pp, pp->pp_data.N_domains, s, link); if (pp_linkset_match(pp->knowledge->domain_contains_links, s)) add_link_to_domain(pp, link); depth_first_search(pp,sublinkage,sublinkage->link[link]->r, sublinkage->link[link]->l, link); pp->pp_data.N_domains++; assert(pp->pp_data.N_domains<PP_MAX_DOMAINS, "raise value of PP_MAX_DOMAINS"); } else { if (pp_linkset_match(pp->knowledge->urfl_domain_starter_links,s)) { setup_domain_array(pp, pp->pp_data.N_domains, s, link); /* always add the starter link to its urfl domain */ add_link_to_domain(pp, link); bad_depth_first_search(pp,sublinkage,sublinkage->link[link]->r, sublinkage->link[link]->l,link); pp->pp_data.N_domains++; assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value"); } else if (pp_linkset_match(pp->knowledge->urfl_only_domain_starter_links,s)) { setup_domain_array(pp, pp->pp_data.N_domains, s, link); /* do not add the starter link to its urfl_only domain */ d_depth_first_search(pp,sublinkage, sublinkage->link[link]->l, sublinkage->link[link]->l, sublinkage->link[link]->r,link); pp->pp_data.N_domains++; assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value"); } else if (pp_linkset_match(pp->knowledge->left_domain_starter_links,s)) { setup_domain_array(pp, pp->pp_data.N_domains, s, link); /* do not add the starter link to a left domain */ left_depth_first_search(pp,sublinkage, sublinkage->link[link]->l, sublinkage->link[link]->r,link); pp->pp_data.N_domains++; assert(pp->pp_data.N_domains<PP_MAX_DOMAINS,"raise PP_MAX_DOMAINS value"); } } } /* sort the domains by size */ qsort((void *) pp->pp_data.domain_array, pp->pp_data.N_domains, sizeof(Domain), (int (*)(const void *, const void *)) domain_compare); /* sanity check: all links in all domains have a legal domain name */ for (d=0; d<pp->pp_data.N_domains; d++) { i = find_domain_name(pp, pp->pp_data.domain_array[d].string); if (i==-1) error("\tpost_process: Need an entry for %s in LINK_TYPE_TABLE", pp->pp_data.domain_array[d].string); pp->pp_data.domain_array[d].type = i; } }