예제 #1
0
파일: main.cpp 프로젝트: litaoshao/waffles
void significance(GArgReader& args)
{
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int attr1 = args.pop_uint();
	int attr2 = args.pop_uint();

	// Parse options
	double tolerance = 0.001;
	while(args.size() > 0)
	{
		if(args.if_pop("-tol"))
			tolerance = args.pop_double();
		else
			ThrowError("Invalid option: ", args.peek());
	}

	// Print some basic stats
	cout.precision(8);
	{
		cout << "### Some basic stats\n";
		cout << "Medians = " << pData->median(attr1) << ", " << pData->median(attr2) << "\n";
		double mean1 = pData->mean(attr1);
		double mean2 = pData->mean(attr2);
		cout << "Means = " << mean1 << ", " << mean2 << "\n";
		double var1 = pData->variance(attr1, mean1);
		double var2 = pData->variance(attr2, mean2);
		cout << "Standard deviations = " << sqrt(var1) << ", " << sqrt(var2) << "\n";
		int less = 0;
		int eq = 0;
		int more = 0;
		for(size_t i = 0; i < pData->rows(); i++)
		{
			double* pRow = pData->row(i);
			if(std::abs(pRow[attr1] - pRow[attr2]) < tolerance)
				eq++;
			else if(pRow[attr1] < pRow[attr2])
				less++;
			else
				more++;
		}
		cout << less << " less, " << eq << " same, " << more << " greater\n";
	}

	// Perform the significance tests
	{
		cout << "\n### Paired T-test\n";
		size_t v;
		double t;
		pData->pairedTTest(&v, &t, attr1, attr2, false);
		double p = GMath::tTestAlphaValue(v, t);
		cout << "v=" << v << ", t=" << t << ", p=" << p << "\n";
	}
	{
		cout << "\n### Paired T-test with normalized values\n";
		size_t v;
		double t;
		pData->pairedTTest(&v, &t, attr1, attr2, true);
		double p = GMath::tTestAlphaValue(v, t);
		cout << "v=" << v << ", t=" << t << ", p=" << p << "\n";
	}
	{
		cout << "\n### Wilcoxon Signed Ranks Test";
		int num;
		double wMinus, wPlus;
		pData->wilcoxonSignedRanksTest(attr1, attr2, tolerance, &num, &wMinus, &wPlus);
		cout << "Number of signed ranks: " << num << "\n";
		double w_min = std::min(wMinus, wPlus);
		double w_sum = wPlus - wMinus;
		cout << "W- = " << wMinus << ", W+ = " << wPlus << ", W_min = " << w_min << ", W_sum = " << w_sum << "\n";

		double p_min = 0.5 * GMath::wilcoxonPValue(num, w_min);
		if(num < 10)
			cout << "Because the number of signed ranks is small, you should use a lookup table, rather than rely on the normal approximation for the P-value.\n";
		cout << "One-tailed P-value (for directional comparisons) computed with a normal approximation using W_min = " << 0.5 * p_min << "\n";
		cout << "Two-tailed P-value (for non-directional comparisons) computed with a normal approximation using W_min = " << p_min << "\n";
		cout << "To show that something is \"better\" than something else, use the one-tailed P-value.\n";
		cout << "Commonly, a P-value less that 0.05 is considered to be significant.\n";
/*
			double p_sum = GMath::wilcoxonPValue(num, w_sum);
			cout << "Directional (one-tailed) P-value computed with W_sum = " << p_sum << "\n";
*/
	}
}