void testGeneratedData20() { DataFrame df; generatedData20(df); GreedyStepwiseSearch uut; shared_ptr<ConsistencySubsetEvaluator> evaluator(new ConsistencySubsetEvaluator()); uut.setEvaluator(evaluator); // this has been validated against weka vector<string> result = uut.findSubset(df); CPPUNIT_ASSERT_EQUAL((int)result.size(), 3); // order isn't important here, but easy CPPUNIT_ASSERT_EQUAL(result[0], string("age")); CPPUNIT_ASSERT_EQUAL(result[1], string("elevel")); CPPUNIT_ASSERT_EQUAL(result[2], string("car")); generatedData1000(df); // these results have been validated against weka result = uut.findSubset(df); CPPUNIT_ASSERT_EQUAL((int)result.size(), 5); // order isn't important here, but easy CPPUNIT_ASSERT_EQUAL(result[0], string("salary")); CPPUNIT_ASSERT_EQUAL(result[1], string("commission")); CPPUNIT_ASSERT_EQUAL(result[2], string("elevel")); CPPUNIT_ASSERT_EQUAL(result[3], string("car")); CPPUNIT_ASSERT_EQUAL(result[4], string("zipcode")); }
void testGeneratedData1000() { DataFrame df; generatedData1000(df); GreedyStepwiseSearch uut; shared_ptr<CfsSubsetEvaluator> evaluator(new CfsSubsetEvaluator()); uut.setEvaluator(evaluator); // this has been validated against weka vector<string> result = uut.findSubset(df); // for (unsigned int i = 0; i < result.size(); i++) // { // cout << result[i] << endl; // } CPPUNIT_ASSERT_EQUAL((int)result.size(), 1); // order isn't important here, but easy CPPUNIT_ASSERT_EQUAL(result[0], string("salary")); }
void testGeneratedData20() { DataFrame df; generatedData20(df); GreedyStepwiseSearch uut; shared_ptr<CfsSubsetEvaluator> evaluator(new CfsSubsetEvaluator()); uut.setEvaluator(evaluator); // this has been validated against weka vector<string> result = uut.findSubset(df); CPPUNIT_ASSERT_EQUAL((int)result.size(), 1); // This is slightly different from the Weka output. Weka lists both age and car, but also // says the highest score is .619, the score for age alone, not the score for car and age. // I wonder if they have some extra logic in there apart from what is in the paper. By // removing car from the list Weka puts in the next highest correlated variable that is not // zero. It may be a bug in my code, but I can't find it. It is reassuring that the 1000 // test is spot on. CPPUNIT_ASSERT_EQUAL(result[0], string("age")); }