void testGeneratedData20()
    {
      DataFrame df;
      generatedData20(df);

      GreedyStepwiseSearch uut;
      shared_ptr<ConsistencySubsetEvaluator> evaluator(new ConsistencySubsetEvaluator());
      uut.setEvaluator(evaluator);

      // this has been validated against weka
      vector<string> result = uut.findSubset(df);
      CPPUNIT_ASSERT_EQUAL((int)result.size(), 3);
      // order isn't important here, but easy
      CPPUNIT_ASSERT_EQUAL(result[0], string("age"));
      CPPUNIT_ASSERT_EQUAL(result[1], string("elevel"));
      CPPUNIT_ASSERT_EQUAL(result[2], string("car"));

      generatedData1000(df);
      // these results have been validated against weka
      result = uut.findSubset(df);
      CPPUNIT_ASSERT_EQUAL((int)result.size(), 5);
      // order isn't important here, but easy
      CPPUNIT_ASSERT_EQUAL(result[0], string("salary"));
      CPPUNIT_ASSERT_EQUAL(result[1], string("commission"));
      CPPUNIT_ASSERT_EQUAL(result[2], string("elevel"));
      CPPUNIT_ASSERT_EQUAL(result[3], string("car"));
      CPPUNIT_ASSERT_EQUAL(result[4], string("zipcode"));
    }
Ejemplo n.º 2
0
    void testGeneratedData1000()
    {
      DataFrame df;
      generatedData1000(df);

      GreedyStepwiseSearch uut;
      shared_ptr<CfsSubsetEvaluator> evaluator(new CfsSubsetEvaluator());
      uut.setEvaluator(evaluator);

      // this has been validated against weka
      vector<string> result = uut.findSubset(df);
//       for (unsigned int i = 0; i < result.size(); i++)
//       {
//         cout << result[i] << endl;
//       }
      CPPUNIT_ASSERT_EQUAL((int)result.size(), 1);
      // order isn't important here, but easy
      CPPUNIT_ASSERT_EQUAL(result[0], string("salary"));
    }
Ejemplo n.º 3
0
    void testGeneratedData20()
    {
      DataFrame df;
      generatedData20(df);

      GreedyStepwiseSearch uut;
      shared_ptr<CfsSubsetEvaluator> evaluator(new CfsSubsetEvaluator());
      uut.setEvaluator(evaluator);

      // this has been validated against weka
      vector<string> result = uut.findSubset(df);
      CPPUNIT_ASSERT_EQUAL((int)result.size(), 1);
      // This is slightly different from the Weka output. Weka lists both age and car, but also
      // says the highest score is .619, the score for age alone, not the score for car and age.
      // I wonder if they have some extra logic in there apart from what is in the paper. By
      // removing car from the list Weka puts in the next highest correlated variable that is not 
      // zero. It may be a bug in my code, but I can't find it. It is reassuring that the 1000 
      // test is spot on.
      CPPUNIT_ASSERT_EQUAL(result[0], string("age"));
    }