// skipping unreached tails: cost_type recompute_cost(ED e) { cost_type c = get(ec, e); SHOWIF2(TUHG, 3, "recompute_cost:base", c, TUHG_PRINT(e, g)); Tailr tailr = tails(e, g); for (Ti i = boost::begin(tailr), ei = boost::end(tailr); i != ei; ++i) { VD t = tail(*i, e, g); // possibly multiple instances of tail t cost_type tc = get(mu, t); SHOWIF3(TUHG, 6, "recompute_cost:c'=c*tc", c, tc, t); assert(tc != PT::unreachable()); // FIXME: maybe we want to allow this (used to be "if")? if we // don't, then you can only reach with non-unreachable cost. PT::extendBy(tc, c); } SHOWIF2(TUHG, 3, "recompute_cost:final", c, TUHG_PRINT(e, g)); return c; }
void operator()(ED ed) { Tailr tailr = tails(ed, g); Ti i = boost::begin(tailr), e = boost::end(tailr); if (i == e) { terminal_arcs.push_back(ed); } else { unsigned ntails_uniq = 0; do { Adj& a = adj[tail(*i, ed, g)]; if (a.size() && last_added(a) == ed) { // last hyperarc with same tail = same hyperarc } else { // new (unique) tail add(a, Tail(ed)); // default multiplicity=1 ++ntails_uniq; } ++i; } while (i != e); put(unique_tails_pmap, ed, ntails_uniq); } }
void operator()(const hypergraph_type& source, hypergraph_type& target) { // first, copy... target = source; if (! source.is_valid()) return; phrase_type binarized(2); hypergraph_type::edge_type::node_set_type tails(2); // we will traverse source-side in order to avoid confusion with newly created nodes... removed_type removed(source.edges.size(), false); position_set_type positions; node_chart_type node_chart; label_chart_type label_chart; label_map.clear(); node_map.clear(); hypergraph_type::node_set_type::const_iterator niter_end = source.nodes.end(); for (hypergraph_type::node_set_type::const_iterator niter = source.nodes.begin(); niter != niter_end; ++ niter) { const hypergraph_type::node_type& node_source = *niter; hypergraph_type::node_type::edge_set_type::const_iterator eiter_end = node_source.edges.end(); for (hypergraph_type::node_type::edge_set_type::const_iterator eiter = node_source.edges.begin(); eiter != eiter_end; ++ eiter) { const hypergraph_type::edge_type& edge_source = source.edges[*eiter]; if (edge_source.tails.size() <= 2) continue; removed[edge_source.id] = true; // we will create nodes in a chart structure, and exhaustively enumerate edges symbol_set_type rhs_sorted(edge_source.rule->rhs); tail_set_type tails_sorted(edge_source.tails); // first, compute non-terminal spans... positions.clear(); int pos = 0; for (size_t i = 0; i != rhs_sorted.size(); ++ i) if (rhs_sorted[i].is_non_terminal()) { const int non_terminal_index = rhs_sorted[i].non_terminal_index(); tails_sorted[pos] = edge_source.tails[utils::bithack::branch(non_terminal_index == 0, pos, non_terminal_index - 1)]; rhs_sorted[i] = rhs_sorted[i].non_terminal(); positions.push_back(i); ++ pos; } if (positions.size() != edge_source.tails.size()) throw std::runtime_error("invalid edge: # of non-terminals and tails size do not match"); // seond, enumerate chart to compute node and edges... node_chart.clear(); node_chart.resize(positions.size() + 1, hypergraph_type::invalid); label_chart.clear(); label_chart.resize(positions.size() + 1); for (size_t i = 0; i != positions.size(); ++ i) { node_chart(i, i + 1) = tails_sorted[i]; label_chart(i, i + 1) = rhs_sorted[positions[i]]; } for (size_t length = 2; length < positions.size(); ++ length) for (size_t first = 0; first + length <= positions.size(); ++ first) { const size_t last = first + length; const symbol_set_type subrhs(rhs_sorted.begin() + positions[first], rhs_sorted.begin() + positions[last - 1] + 1); const tail_set_type subtails(tails_sorted.begin() + first, tails_sorted.begin() + last); std::pair<label_map_type::iterator, bool> result_label = label_map.insert(std::make_pair(subtails, symbol_type())); if (result_label.second) { const symbol_type::piece_type left = label_chart(first, last - 1).non_terminal_strip(); const symbol_type::piece_type right = label_chart(last - 1, last).non_terminal_strip(); if (length > 2) result_label.first->second = '[' + std::string(left.begin(), left.end() - 1) + '+' + right + "^]"; else result_label.first->second = '[' + std::string(left) + '+' + right + "^]"; } std::pair<node_map_type::iterator, bool> result_node = node_map.insert(std::make_pair(tail_symbol_pair_type(subtails, subrhs), 0)); if (result_node.second) result_node.first->second = target.add_node().id; const symbol_type lhs = result_label.first->second; const hypergraph_type::id_type head = result_node.first->second; node_chart(first, last) = head; label_chart(first, last) = lhs; // if newly created, then, create edges if (result_node.second) for (size_t middle = first + 1; middle != last; ++ middle) { // [first, middle) and [middle, last) tails.front() = node_chart(first, middle); tails.back() = node_chart(middle, last); const size_t middle_first = positions[middle - 1] + 1; const size_t middle_last = positions[middle]; binarized.clear(); binarized.push_back(label_chart(first, middle)); binarized.insert(binarized.end(), rhs_sorted.begin() + middle_first, rhs_sorted.begin() + middle_last); binarized.push_back(label_chart(middle, last)); hypergraph_type::edge_type& edge_new = target.add_edge(tails.begin(), tails.end()); edge_new.rule = rule_type::create(rule_type(lhs, binarized.begin(), binarized.end())); target.connect_edge(edge_new.id, head); } } // root... { const size_t first = 0; const size_t last = positions.size(); const hypergraph_type::id_type head = node_source.id; const symbol_type& lhs = edge_source.rule->lhs; node_chart(first, last) = head; label_chart(first, last) = lhs; for (size_t middle = first + 1; middle != last; ++ middle) { // [first, middle) and [middle, last) tails.front() = node_chart(first, middle); tails.back() = node_chart(middle, last); binarized.clear(); const size_t prefix_first = 0; const size_t prefix_last = positions[first]; binarized.insert(binarized.end(), rhs_sorted.begin() + prefix_first, rhs_sorted.begin() + prefix_last); binarized.push_back(label_chart(first, middle)); const size_t middle_first = positions[middle - 1] + 1; const size_t middle_last = positions[middle]; binarized.insert(binarized.end(), rhs_sorted.begin() + middle_first, rhs_sorted.begin() + middle_last); binarized.push_back(label_chart(middle, last)); const size_t suffix_first = positions[last - 1] + 1; const size_t suffix_last = rhs_sorted.size(); binarized.insert(binarized.end(), rhs_sorted.begin() + suffix_first, rhs_sorted.begin() + suffix_last); hypergraph_type::edge_type& edge_new = target.add_edge(tails.begin(), tails.end()); edge_new.rule = rule_type::create(rule_type(lhs, binarized.begin(), binarized.end())); edge_new.features = edge_source.features; edge_new.attributes = edge_source.attributes; target.connect_edge(edge_new.id, head); } } } } // further resize... removed.resize(target.edges.size(), false); hypergraph_type graph_removed; topologically_sort(target, graph_removed, filter(removed)); target.swap(graph_removed); }
void operator()(const lattice_type& lattice, hypergraph_type& graph) { graph.clear(); actives.clear(); actives.resize(lattice.size() + 2, std::make_pair(hypergraph_type::invalid, hypergraph_type::invalid)); // initialize actives by axioms... (terminals) for (size_t pos = 0; pos != lattice.size(); ++ pos) { if (lattice[pos].size() != 1) throw std::runtime_error("this is not a sentential lattice!"); // here, we will construct a partial hypergraph... lattice_type::arc_set_type::const_iterator aiter_end = lattice[pos].end(); for (lattice_type::arc_set_type::const_iterator aiter = lattice[pos].begin(); aiter != aiter_end; ++ aiter) { if (aiter->distance != 1) throw std::runtime_error("this is not a sentential lattice"); hypergraph_type::edge_type& edge = graph.add_edge(); edge.rule = rule_type::create(rule_type(vocab_type::X, rule_type::symbol_set_type(1, aiter->label))); edge.features = aiter->features; edge.attributes[attr_dependency_pos] = attribute_set_type::int_type(pos + 1); const hypergraph_type::id_type node_id = graph.add_node().id; graph.connect_edge(edge.id, node_id); actives(pos + 1, pos + aiter->distance + 1).first = node_id; actives(pos + 1, pos + aiter->distance + 1).second = node_id; #if 0 { // right attachment hypergraph_type::edge_type& edge = graph.add_edge(&node_id, (&node_id) + 1); edge.rule = rule_reduce1; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(pos + 1 - 1); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(pos + 1); const hypergraph_type::id_type node_id_next = graph.add_node().id; graph.connect_edge(edge.id, node_id_next); actives(pos + 1, pos + aiter->distance + 1).second = node_id_next; } #endif } } hypergraph_type::edge_type::node_set_type tails(2); const int last_max = lattice.size() + 1; for (int last = 2; last <= last_max; ++ last) for (int length = 2; last - length >= 0; ++ length) { const int first = last - length; id_pair_type& cell = actives(first, last); cell.first = graph.add_node().id; cell.second = graph.add_node().id; for (int middle = first + 1; middle < last; ++ middle) { if (first == 0 && middle == 1) { // since we have [0^0, 1], we need to enumerate only two cases if (last < last_max) { tails.front() = actives(first, middle).first; tails.back() = actives(middle, last).first; hypergraph_type::edge_type& edge = graph.add_edge(tails.begin() + 1, tails.end()); edge.rule = rule_reduce1; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(last); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.first); } { tails.front() = actives(first, middle).first; tails.back() = actives(middle, last).second; hypergraph_type::edge_type& edge = graph.add_edge(tails.begin() + 1, tails.end()); edge.rule = rule_reduce1; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(first); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.first); } } else { // we need to enumerate 4 cases if (last < last_max) { { tails.front() = actives(first, middle).first; tails.back() = actives(middle, last).first; // left attachment hypergraph_type::edge_type& edge = graph.add_edge(tails.begin(), tails.end()); edge.rule = rule_reduce2; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(last); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.first); } { tails.front() = actives(first, middle).second; tails.back() = actives(middle, last).first; // left attachment hypergraph_type::edge_type& edge = graph.add_edge(tails.begin(), tails.end()); edge.rule = rule_reduce2; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(last); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.second); } } { tails.front() = actives(first, middle).first; tails.back() = actives(middle, last).second; // right attachment hypergraph_type::edge_type& edge = graph.add_edge(tails.begin(), tails.end()); edge.rule = rule_reduce2; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(first); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.first); } { tails.front() = actives(first, middle).second; tails.back() = actives(middle, last).second; // right attachment hypergraph_type::edge_type& edge = graph.add_edge(tails.begin(), tails.end()); edge.rule = rule_reduce2; edge.attributes[attr_dependency_head] = attribute_set_type::int_type(first); edge.attributes[attr_dependency_dependent] = attribute_set_type::int_type(middle); graph.connect_edge(edge.id, cell.second); } } } } // final... graph.goal = actives(0, last_max).first; graph.topologically_sort(); }