char* DecisionTreeClassifier::findMajorityClass(const std::vector<attr_set*>* dec_set) { char* major_class = new char[256]; int freq = 0; attr_set* attset; for(std::vector<attr_set*>::const_iterator it1=dec_set->begin(); it1 != dec_set->end(); it1++) { attset = (*it1); string str_class("Class"); attr_set::iterator attr_it = attset->find(str_class); if(attr_it != attset->end()) { string str_attr(attr_it->second); freq = class_freq_map[str_attr]; class_freq_map[str_attr] = freq++; } } for(std::tr1::unordered_map<string,int>::iterator it2=class_freq_map.begin();it2 != class_freq_map.end(); it2++) { int max = 0; if(it2->second > max) { strcpy(major_class, (it2->first).c_str()); max = it2->second; } } return major_class; }
double DecisionTreeClassifier::impurityEval1(const std::vector<attr_set*>* dec_set) { double entropy = 0.0; double max = 0.0; double total = 0.0; double prob = 0.0; class_freq_map.clear(); for(std::vector<attr_set*>::const_iterator it1=dec_set->begin(); it1 != dec_set->end(); it1++) { string str_class("Class"); attr_set::iterator attr_it = (*it1)->find(str_class); if(attr_it != (*it1)->end()) { string str_attr(attr_it->second); int freq =class_freq_map[str_attr]; class_freq_map[str_attr] = ++freq; } } for(std::tr1::unordered_map<string, int>::iterator it2=class_freq_map.begin();it2 != class_freq_map.end(); it2++) { total += it2->second; } for(std::tr1::unordered_map<string, int>::iterator it3=class_freq_map.begin(); it3 != class_freq_map.end(); it3++) { prob = (double) it3->second / total; entropy -= ( prob * log2(prob)); } return entropy; }
void WRclass(enum SC c) { dbg_printf("%11s ",str_class(c)); }