// decomposes a rule (A -> B C D E ...) into the rules (A -> B#C#D E), (B#C#D -> B#C D), (B#C -> B C), ... respectively. // might expand 'nonterminals' // might rewrite 'rule_to_decompose' void decompose(rule &rule_to_decompose, set<string> &nonterminals, rules &decomposed_rules) { string temp; if (rule_to_decompose.size() == 2) { if (is_in(rule_to_decompose[1], nonterminals)) { cerr << " ERROR : irregular occurence of chain rule (i.e. A -> B)! - ignored " << '\n'; return; } else decomposed_rules.insert(rule_to_decompose); } else { for (size_t i = 1; i<rule_to_decompose.size();++i) if (!is_in(rule_to_decompose[i], nonterminals)) { temp = "#" + rule_to_decompose[i]; nonterminals.insert(temp); decomposed_rules.insert(rule{ temp, rule_to_decompose[i] }); replace(rule_to_decompose.begin() + i, rule_to_decompose.end(), rule_to_decompose[i], temp); } while(rule_to_decompose.size() > 3) { temp = rule_to_decompose[1] + "#" + rule_to_decompose[2]; nonterminals.insert(temp); decomposed_rules.insert(rule{ temp, rule_to_decompose[1], rule_to_decompose[2] }); rule_to_decompose[1] = temp; rule_to_decompose.erase(rule_to_decompose.begin() + 2); } decomposed_rules.insert(rule_to_decompose); } }
void clear() { _table.clear(); _columns = _rows = 0; _rules.clear(); _captures.clear(); }
int processData(attrtable& at, datatable& dt, rules& out_rules, int& out_attr_idx) { // Rules int max_idx = -1; int max_correct = -1; // For each attribute for (int i = 0; i < (int)at.size() - 1; ++i) { // For checking unique options for each attribute std::set<std::string> unique; ruleboard errorboard; // For each value of that attribute // Count how often each class appears for (auto value : dt[i]) { // C++11 23.4.4.3 map element access [map.access] // If no key found, the int value is zero initalized ++errorboard[dt[dt.size() - 1][value.first]][value.second]; unique.insert(value.second); } // Find the most frequent class rules cur_rule; int total_correct= 0; for (auto value : unique) { int max = -1; std::string classification; for (auto e : errorboard) { if (e.second[value] > max) { max = e.second[value]; classification = e.first; } } total_correct += max; // Make the rule assign that to this attribute value cur_rule.emplace(std::string(value), std::string(classification)); } // Choose the rules with largest correct rate (same as smallest error rate) if (max_correct < total_correct) { max_idx = i; max_correct = total_correct; out_rules.clear(); out_rules = rules(cur_rule); } } // We found the best attribute that has largest correct rate, // return the count of correct out_attr_idx = max_idx; return max_correct; }
static void dump(const char_state_machine &csm_, rules &rules_, ostream &stream_) { for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) { lexer_state(stream_); stream_ << rules_.state(dfa_) << std::endl << std::endl; dump_ex(csm_._sm_deque[dfa_], stream_); } }
void add_rule(rule_type const& rule) { rules_.push_back(rule); }
// Internal function actually performing the work of dumping the // state machine in DOT. static void dump_ex ( id_type dfa_id_, const typename char_state_machine::dfa &dfa_, rules &rules_, ostream &stream_) { const std::size_t states_ = dfa_._states.size (); typename dfa_state::id_type_string_token_map::const_iterator iter_; typename dfa_state::id_type_string_token_map::const_iterator end_; stream_ << std::endl; for (std::size_t i_ = 0; i_ < states_; ++i_) { const dfa_state &state_ = dfa_._states[i_]; const string name = node_name(dfa_id_, i_); if (i_ == 0) { stream_ << " " << name << " [shape = doublecircle, xlabel=\"" << rules_.state(dfa_id_) << "\"];" << std::endl; } else if (state_._end_state) { stream_ << " " << name << " [shape = doublecircle, xlabel=\"id =" << static_cast<std::size_t>(state_._id) << "\"];" << std::endl; } else { stream_ << " " << name << " [shape = circle];" << std::endl; } } stream_ << std::endl; for (std::size_t i_ = 0; i_ < states_; ++i_) { const dfa_state &state_ = dfa_._states[i_]; iter_ = state_._transitions.begin (); end_ = state_._transitions.end (); const string src_name = node_name(dfa_id_, i_); for (; iter_ != end_; ++iter_) { const string dst_name = node_name(dfa_id_, iter_->first); stream_ << " " << src_name << " -> " << dst_name << " [label = \""; string_token token_ = iter_->second; open_bracket (stream_); if (!iter_->second.any () && iter_->second.negatable ()) { token_.negate (); negated (stream_); } string chars_; typename string_token::range_vector::const_iterator ranges_iter_ = token_._ranges.begin (); typename string_token::range_vector::const_iterator ranges_end_ = token_._ranges.end (); for (; ranges_iter_ != ranges_end_; ++ranges_iter_) { if (ranges_iter_->first == '^' || ranges_iter_->first == ']') { stream_ << "\\\\"; } chars_ = double_escape_char(ranges_iter_->first); if (ranges_iter_->first != ranges_iter_->second) { if (ranges_iter_->first + 1 < ranges_iter_->second) { chars_ += '-'; } if (ranges_iter_->second == '^' || ranges_iter_->second == ']') { stream_ << "\\\\"; } chars_ += double_escape_char(ranges_iter_->second); } stream_ << chars_; } close_bracket (stream_); stream_ << "\"];" << std::endl; } if (state_._end_state) { const string dst_name = node_name(state_._next_dfa, 0); stream_ << " " << src_name << " -> " << dst_name << " [style = \"dashed\"];" << std::endl; } } }
void add_rule(const rule_type& rule) { rules_.push_back(rule); }