Beispiel #1
0
// Find adjacent intervals with smallest $\chi^2$
IntervalList::iterator find_min_chi_chi()
{
	// Compute $\chi^2$ value for each adjacent interval,
	//  keeping track of minimum value
	IntervalList::iterator lit = g_intervals.begin(),
		lend = g_intervals.end();
	IntervalList::iterator next = lit, min_lit = lend;

	float min_chisquared = 1e6;
	bool first = true;

	for ( ; lit != lend; ++lit) {
        next = lit;
		++next;

		if (next == lend) break;

		float chisquared = compute_chisquared(*lit, *next);

		// Debug
		print_interval_set(*lit);
		cout << "\\chi^2 = " << chisquared << endl;

		if (first || chisquared < min_chisquared) {
			min_lit = lit;
			min_chisquared = chisquared;
		}

		first = false;
	}

	cout << "min_chisquared = " << min_chisquared << endl;

    return min_lit;
}
Beispiel #2
0
void print_interval_summary(ostream& os, int dimIndex)
{
	IntervalList::iterator lit = g_intervals.begin(),
		lend = g_intervals.end();
	IntervalList::iterator next = lit;

	vector<DataType> split_points;

	os << "\n" "Feature " << (dimIndex+1) << ":" << endl;
	os << "Ranges: ";

	for ( ; lit != lend; ++lit) {
        next = lit;
		++next;

		pair<DataType, DataType> range, next_range;
		range = get_range(*lit, dimIndex);

		if (next != lend) {
            next_range = get_range(*next, dimIndex);

			float average = (range.second + next_range.first) * 0.5f;
			split_points.push_back(average);
		}

		os << "[" << range.first << ", " << range.second << "] ";
	}

	os << endl;

	os << "Split points: ";
	copy(split_points.begin(), split_points.end(),
		ostream_iterator<DataType>(os, ", "));
	os << endl;
}
Beispiel #3
0
// Debugging -- print all intervals
void print_all_intervals()
{
	IntervalList::iterator lit = g_intervals.begin(),
		lend = g_intervals.end();

	cout << "[intervals]" << endl;

	for ( ; lit != lend; ++lit) {
		IntervalSet& indices = *lit;
		print_interval_set(indices);
	}
}
Beispiel #4
0
// $\chi^2$ just for one dimension
void chi_chi_dim_analysis(int dimIndex)
{	
	// Sort and initialize one interval per unique attribute value

	sort(g_data.begin(), g_data.end(), tuple_less_than<Tuple>(dimIndex));

	TupleVec::iterator tit = g_data.begin(), tend = g_data.end();

	cout << "[sort]" << endl;

	int index = 0;
	for ( ; tit != tend; ++tit, ++index) {
		cout << index << ":";
		copy(tit->first.begin(), tit->first.end(), ostream_iterator<float>(cout, ", "));
		cout << tit->second << endl;		
	}

	initialize_intervals(dimIndex);

	// Count instances of all classes
	count_classes();

	while ((int)g_intervals.size() > g_max_intervals) {
		// Find adjacent intervals with smallest $\chi^2$
		IntervalList::iterator min_lit = find_min_chi_chi();
		assert(min_lit != g_intervals.end());

		IntervalList::iterator min_lit_next = min_lit;
		++min_lit_next;

		cout << "[before merge] ";
		print_all_intervals();

		// Merge
		IntervalSet& interval_1 = *min_lit;
		IntervalSet& interval_2 = *min_lit_next;

		interval_1.insert(interval_2.begin(), interval_2.end());

		g_intervals.erase(min_lit_next);

		cout << "[after merge] ";
		print_all_intervals();
	}

	// Debugging
	print_interval_summary(cout, dimIndex);

	// Logged output
	print_interval_summary(olog, dimIndex);
}