int ME_Model::make_feature_bag(const int cutoff) { int max_num_features = 0; // count the occurrences of features #ifdef USE_HASH_MAP #if __cplusplus < 201103L typedef std::tr1::unordered_map<unsigned int, int> map_type; #else typedef std::unordered_map<unsigned int, int> map_type; #endif #else typedef std::map<unsigned int, int> map_type; #endif map_type count; if (cutoff > 0) { for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++) { for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { count[ME_Feature(i->label, *j).body()]++; } for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) { count[ME_Feature(i->label, j->first).body()]++; } } } int n = 0; for (std::vector<Sample>::const_iterator i = _vs.begin(); i != _vs.end(); i++, n++) { max_num_features = max(max_num_features, (int)(i->positive_features.size())); for (std::vector<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { const ME_Feature feature(i->label, *j); // if (cutoff > 0 && count[feature.body()] < cutoff) continue; if (cutoff > 0 && count[feature.body()] <= cutoff) continue; _fb.Put(feature); // cout << i->label << "\t" << *j << "\t" << id << endl; // feature2sample[id].push_back(n); } for (std::vector<pair<int, double> >::const_iterator j = i->rvfeatures.begin(); j != i->rvfeatures.end(); j++) { const ME_Feature feature(i->label, j->first); // if (cutoff > 0 && count[feature.body()] < cutoff) continue; if (cutoff > 0 && count[feature.body()] <= cutoff) continue; _fb.Put(feature); } } count.clear(); // cerr << "num_classes = " << _num_classes << endl; // cerr << "max_num_features = " << max_num_features << endl; init_feature2mef(); return max_num_features; }
int ME_Model::make_feature_bag(const int cutoff) { int max_label = 0; int max_num_features = 0; for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) { max_label = max(max_label, i->label); } _num_classes = max_label + 1; // map< int, list<int> > feature2sample; // count the occurrences of features #ifdef USE_HASH_MAP typedef __gnu_cxx::hash_map<unsigned int, int> map_type; #else typedef std::map<unsigned int, int> map_type; #endif map_type count; if (cutoff > 0) { for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) { for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { count[ME_Feature(i->label, *j).body()]++; } } } int n = 0; for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++, n++) { max_num_features = max(max_num_features, (int)(i->positive_features.size())); for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++) { const ME_Feature feature(i->label, *j); if (cutoff > 0 && count[feature.body()] < cutoff) continue; //int id = _fb.Put(feature); // cout << i->label << "\t" << *j << "\t" << id << endl; // feature2sample[id].push_back(n); } } count.clear(); // cerr << "num_classes = " << _num_classes << endl; // cerr << "max_num_features = " << max_num_features << endl; int c = 0; _sample2feature.clear(); _sample2feature.resize(_train.size()); n = 0; for (std::vector<Sample>::const_iterator i = _train.begin(); i != _train.end(); i++) { // _sample2feature[n].resize(_num_classes); for (std::list<int>::const_iterator j = i->positive_features.begin(); j != i->positive_features.end(); j++){ for (int k = 0; k < _num_classes; k++) { int id = _fb.Id(ME_Feature(k, *j)); if (id >= 0) { _sample2feature[n].push_back(id); c++; } } } n++; } // cerr << "c = " << c << endl; return max_num_features; }