void test_recommend_fillmissingvalues() { // Make some input files TempFileMaker tempFile1("a.arff", "@RELATION test\n" "@ATTRIBUTE a1 { a, b, c }\n" "@ATTRIBUTE a2 continuous\n" "@ATTRIBUTE a3 { d, e, f }\n" "@ATTRIBUTE a4 { g, h, i }\n" "@DATA\n" "a, ?, f, i\n" "?, 2, ?, i\n" "b, ?, d, ?\n" "?, 4, ?, ?\n" "?, ?, e, g\n" "?, ?, e, ?\n" "a, ?, ?, h\n" "\n" ); // Execute the command GPipe pipeStdOut; if(sysExec("waffles_recommend", "fillmissingvalues a.arff baseline", &pipeStdOut) != 0) throw Ex("exit status indicates failure"); char buf[512]; size_t len = pipeStdOut.read(buf, 512); if(len == 512) throw Ex("need a bigger buffer"); buf[len] = '\0'; // Check the results GMatrix M; M.parseArff(buf, strlen(buf)); if(M.rows() != 7 || M.cols() != 4) throw Ex("failed"); if(M[0][0] != 0) throw Ex("failed"); if(M[0][1] != 3) throw Ex("failed"); if(M[1][1] != 2) throw Ex("failed"); if(M[2][1] != 3) throw Ex("failed"); if(M[3][3] != 2) throw Ex("failed"); if(M[4][0] != 0) throw Ex("failed"); if(M[5][1] != 3) throw Ex("failed"); if(M[6][2] != 1) throw Ex("failed"); if(M[6][3] != 1) throw Ex("failed"); }
void test_parsearff_quoting(){ const char* inputArff= "@relation 'squares of numbers'\n" "\n" "@attribute 'the number' real\n" "\n" "@attribute 'the square of the number' real\n" "\n" "@attribute exact {'is exact', inexact,is\\\\\\ exact}\n" "\n" "@data\n" "1,1,'is exact'\n" "2,4,is\\ exact\n" "1.414,2,inexact\n" "3,9,\"is exact\"\n" "4,16,\"is\\ exact\"\n" ; GMatrix M; M.parseArff(inputArff, strlen(inputArff)); double expected_data[5][3]={{1,1,0},{2,4,0},{1.414,2,1},{3,9,0},{4,16,2}}; const GArffRelation* pRel = (const GArffRelation*)&M.relation(); const GArffRelation& R = *pRel; TestEqual(R.size(), (std::size_t)3, "Incorrect number of attributes"); for(unsigned row = 0; row < 5; ++row){ for(unsigned col = 0; col < 3; ++col){ std::stringstream errdescr; errdescr << "Incorrect matrix entry [" << row << "][" << col << "]"; TestEqual(M[row][col], expected_data[row][col], errdescr.str()); } } TestEqual(true, R.areContinuous(0,2), "First or second attribute is not continuous"); TestEqual(true, R.areNominal(2,1), "Third attribute is not nominal"); std::stringstream val0, val1, val2; R.printAttrValue(val0, 2, 0); R.printAttrValue(val1, 2, 1); R.printAttrValue(val2, 2, 2); TestEqual("'is exact'",val0.str(), "First value of third attribute incorrect name"); TestEqual("inexact",val1.str(), "Second value of third attribute incorrect name"); TestEqual("is\\ exact",val2.str(), "Third value of third attribute incorrect name"); TestEqual("'the number'",R.attrName(0),"First attribute incorrect name"); TestEqual("'the square of the number'",R.attrName(1), "Second attribute incorrect name"); TestEqual("exact",R.attrName(2),"Third attribute incorrect name"); }
void GNaiveBayes_testMath() { const char* trainFile = "@RELATION test\n" "@ATTRIBUTE a {t,f}\n" "@ATTRIBUTE b {r,g,b}\n" "@ATTRIBUTE c {y,n}\n" "@DATA\n" "t,r,y\n" "f,r,n\n" "t,g,y\n" "f,g,y\n" "f,g,n\n" "t,r,n\n" "t,r,y\n" "t,b,y\n" "f,r,y\n" "f,g,n\n" "f,b,y\n" "t,r,n\n"; GMatrix train; train.parseArff(trainFile, strlen(trainFile)); GMatrix* pFeatures = train.cloneSub(0, 0, train.rows(), 2); std::unique_ptr<GMatrix> hFeatures(pFeatures); GMatrix* pLabels = train.cloneSub(0, 2, train.rows(), 1); std::unique_ptr<GMatrix> hLabels(pLabels); GNaiveBayes nb; nb.setEquivalentSampleSize(0.0); nb.train(*pFeatures, *pLabels); GPrediction out; GVec pat(2); pat[0] = 0; pat[1] = 0; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*3.0/7.0, 5.0/12.0, 2.0/5.0*3.0/5.0, &out); pat[0] = 0; pat[1] = 1; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*2.0/7.0, 5.0/12.0, 2.0/5.0*2.0/5.0, &out); pat[0] = 0; pat[1] = 2; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*2.0/7.0, 5.0/12.0, 2.0/5.0*0.0/5.0, &out); pat[0] = 1; pat[1] = 0; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*3.0/7.0, 5.0/12.0, 3.0/5.0*3.0/5.0, &out); pat[0] = 1; pat[1] = 1; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*2.0/7.0, 5.0/12.0, 3.0/5.0*2.0/5.0, &out); pat[0] = 1; pat[1] = 2; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*2.0/7.0, 5.0/12.0, 3.0/5.0*0.0/5.0, &out); }