Пример #1
0
int
ME_Model::make_feature_bag(const int cutoff)
{
  int max_num_features = 0;

  // count the occurrences of features
#ifdef USE_HASH_MAP
#if __cplusplus < 201103L
  typedef std::tr1::unordered_map<unsigned int, int> map_type;
#else
  typedef std::unordered_map<unsigned int, int> map_type;
#endif
#else    
  typedef std::map<unsigned int, int> map_type;
#endif
  map_type count;
  if (cutoff > 0) {
    for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++) {
      for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) {
        count[ME_Feature(i->label, *j).body()]++;
      }
      for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) {
        count[ME_Feature(i->label, j->first).body()]++;
      }
    }
  }

  int n = 0; 
  for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++, n++) {
    max_num_features = max(max_num_features, (int)(i->positive_features.size()));
    for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) {
      const ME_Feature feature(i->label, *j);
      //      if (cutoff > 0 && count[feature.body()] < cutoff) continue;
      if (cutoff > 0 && count[feature.body()] <= cutoff) continue;
      _fb.Put(feature);
      //      cout << i->label << "\t" << *j << "\t" << id << endl;
      //      feature2sample[id].push_back(n);
    }
    for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) {
      const ME_Feature feature(i->label, j->first);
      //      if (cutoff > 0 && count[feature.body()] < cutoff) continue;
      if (cutoff > 0 && count[feature.body()] <= cutoff) continue;
      _fb.Put(feature);
    }
  }
  count.clear();
  
  //  cerr << "num_classes = " << _num_classes << endl;
  //  cerr << "max_num_features = " << max_num_features << endl;

  init_feature2mef();
  
  return max_num_features;
}
Пример #2
0
int
ME_Model::make_feature_bag(const int cutoff)
{
  int max_label = 0;
  int max_num_features = 0;
  for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) {
    max_label = max(max_label, i->label);
  }
  _num_classes = max_label + 1;

  //  map< int, list<int> > feature2sample;

  // count the occurrences of features
#ifdef USE_HASH_MAP
  typedef __gnu_cxx::hash_map<unsigned int, int> map_type;
#else
  typedef std::map<unsigned int, int> map_type;
#endif
  map_type count;
  if (cutoff > 0) {
    for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) {
      for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) {
        count[ME_Feature(i->label, *j).body()]++;
      }
    }
  }

  int n = 0;
  for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++, n++) {
    max_num_features = max(max_num_features, (int)(i->positive_features.size()));
    for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) {
      const ME_Feature feature(i->label, *j);
      if (cutoff > 0 && count[feature.body()] < cutoff) continue;
      //int id = _fb.Put(feature);
      //      cout << i->label << "\t" << *j << "\t" << id << endl;
      //      feature2sample[id].push_back(n);
    }
  }
  count.clear();

  //  cerr << "num_classes = " << _num_classes << endl;
  //  cerr << "max_num_features = " << max_num_features << endl;

  int c = 0;

  _sample2feature.clear();
  _sample2feature.resize(_train.size());

  n = 0;
  for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) {
    //    _sample2feature[n].resize(_num_classes);
    for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++){
      for (int k = 0; k < _num_classes; k++) {
        int id = _fb.Id(ME_Feature(k, *j));
        if (id >= 0) {
          _sample2feature[n].push_back(id);
          c++;
        }
      }
    }
    n++;
  }

  //  cerr << "c = " << c << endl;

  return max_num_features;
}