double ConsistencySubsetEvaluator::evaluateSubset(const std::vector<int>& columns, 
    TgsProgress*) 
  { 
    BinHashFunctor::_columns = columns;

    const DataFrame& df = _dataFrame;
    // hash<bin key, hash<class enumeration, instance count> >
    typedef HashMap< 
      std::vector<int>, // bin key
      InconsistentInstancesMap, // map of class enumerations to instance count
      BinHashFunctor> // fancy hash function and comparison that only looks at 'columns'
      BinMap;
    BinMap binMap;
    
    for (unsigned int i = 0; i < _bins.size(); i++)
    {
      //BinMap::iterator it = binMap.find(_bins[i]);
      InconsistentInstancesMap& eim = binMap[_bins[i]];
      int classEnum = _enumMap[df.getTrainingLabel(i)];
      if (eim.size() == 0)
      {
        eim.resize(_enumMap.size(), 0);
      }
      eim[classEnum]++;
    }

    int inconsistencyCount = 0;
    for (BinMap::const_iterator it = binMap.begin(); it != binMap.end(); it++)
    {
      inconsistencyCount += _calculateInconsistentCount(it->second);
    }

    double inconsistencyRate = (double)inconsistencyCount / (double)_bins.size();
    return -inconsistencyRate;
  }
Ejemplo n.º 2
0
      inline void dump(std::ostream& s) const
      {
         BinMap::const_iterator bmi;
         for (bmi = bins.begin(); bmi != bins.end(); bmi++)
         {
            const BinRange& br = bmi->first;
            s << std::right << std::setw(3) << br.first
              << "-" << std::left  << std::setw(3) << br.second
              << ":   " << std::right <<  bmi->second
              << std::endl;
         }

         s << std::right << std::setw(3) << bins.begin()->first.first
           << "-" << std::left  << std::setw(3) << bins.rbegin()->first.second
           << ":   " << std::right <<  total
           << std::endl;
      };
Ejemplo n.º 3
0
 inline void addValue(double v)
 {
    BinMap::iterator bm_itr;
    for (bm_itr=bins.begin(); bm_itr != bins.end(); bm_itr++)
    {
       const BinRange& range = bm_itr->first;
       if (range.first < v && v <= range.second)
       {
          bm_itr->second++;
          total++;
          break;
       }
    }
 }