Пример #1
0
 void clear()
 {
     _table.clear();
     _columns = _rows = 0;
     _rules.clear();
     _captures.clear();
 }
Пример #2
0
int processData(attrtable& at, datatable& dt, rules& out_rules, int& out_attr_idx)
{
  //  Rules
  int max_idx = -1;
  int max_correct = -1;
  
  // For each attribute
  for (int i = 0; i < (int)at.size() - 1; ++i)
  {
    // For checking unique options for each attribute
    std::set<std::string> unique;

    ruleboard errorboard;

    // For each value of that attribute
    // Count how often each class appears
    for (auto value : dt[i])
    {
      // C++11 23.4.4.3 map element access [map.access]
      // If no key found, the int value is zero initalized
      ++errorboard[dt[dt.size() - 1][value.first]][value.second];
      unique.insert(value.second);
    }
    // Find the most frequent class
    rules cur_rule;
    int total_correct= 0;
    for (auto value : unique)
    {
      int max = -1;
      std::string classification;

      for (auto e : errorboard)
      {
        if (e.second[value] > max)
        {
          max = e.second[value];
          classification = e.first;
        }
      }
      total_correct += max;
      // Make the rule assign that to this attribute value
      cur_rule.emplace(std::string(value), std::string(classification));
    }

    // Choose the rules with largest correct rate (same as smallest error rate)
    if (max_correct < total_correct)
    {
      max_idx = i;
      max_correct = total_correct;
      out_rules.clear();
      out_rules = rules(cur_rule);
    }
  }
  // We found the best attribute that has largest correct rate, 
  // return the count of correct
  out_attr_idx = max_idx;
  return max_correct;
}