// $\chi^2$ just for one dimension void chi_chi_dim_analysis(int dimIndex) { // Sort and initialize one interval per unique attribute value sort(g_data.begin(), g_data.end(), tuple_less_than<Tuple>(dimIndex)); TupleVec::iterator tit = g_data.begin(), tend = g_data.end(); cout << "[sort]" << endl; int index = 0; for ( ; tit != tend; ++tit, ++index) { cout << index << ":"; copy(tit->first.begin(), tit->first.end(), ostream_iterator<float>(cout, ", ")); cout << tit->second << endl; } initialize_intervals(dimIndex); // Count instances of all classes count_classes(); while ((int)g_intervals.size() > g_max_intervals) { // Find adjacent intervals with smallest $\chi^2$ IntervalList::iterator min_lit = find_min_chi_chi(); assert(min_lit != g_intervals.end()); IntervalList::iterator min_lit_next = min_lit; ++min_lit_next; cout << "[before merge] "; print_all_intervals(); // Merge IntervalSet& interval_1 = *min_lit; IntervalSet& interval_2 = *min_lit_next; interval_1.insert(interval_2.begin(), interval_2.end()); g_intervals.erase(min_lit_next); cout << "[after merge] "; print_all_intervals(); } // Debugging print_interval_summary(cout, dimIndex); // Logged output print_interval_summary(olog, dimIndex); }