void GNaiveBayes_testMath() { const char* trainFile = "@RELATION test\n" "@ATTRIBUTE a {t,f}\n" "@ATTRIBUTE b {r,g,b}\n" "@ATTRIBUTE c {y,n}\n" "@DATA\n" "t,r,y\n" "f,r,n\n" "t,g,y\n" "f,g,y\n" "f,g,n\n" "t,r,n\n" "t,r,y\n" "t,b,y\n" "f,r,y\n" "f,g,n\n" "f,b,y\n" "t,r,n\n"; GMatrix train; train.parseArff(trainFile, strlen(trainFile)); GMatrix* pFeatures = train.cloneSub(0, 0, train.rows(), 2); std::unique_ptr<GMatrix> hFeatures(pFeatures); GMatrix* pLabels = train.cloneSub(0, 2, train.rows(), 1); std::unique_ptr<GMatrix> hLabels(pLabels); GNaiveBayes nb; nb.setEquivalentSampleSize(0.0); nb.train(*pFeatures, *pLabels); GPrediction out; GVec pat(2); pat[0] = 0; pat[1] = 0; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*3.0/7.0, 5.0/12.0, 2.0/5.0*3.0/5.0, &out); pat[0] = 0; pat[1] = 1; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*2.0/7.0, 5.0/12.0, 2.0/5.0*2.0/5.0, &out); pat[0] = 0; pat[1] = 2; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 4.0/7.0*2.0/7.0, 5.0/12.0, 2.0/5.0*0.0/5.0, &out); pat[0] = 1; pat[1] = 0; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*3.0/7.0, 5.0/12.0, 3.0/5.0*3.0/5.0, &out); pat[0] = 1; pat[1] = 1; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*2.0/7.0, 5.0/12.0, 3.0/5.0*2.0/5.0, &out); pat[0] = 1; pat[1] = 2; nb.predictDistribution(pat, &out); GNaiveBayes_CheckResults(7.0/12.0, 3.0/7.0*2.0/7.0, 5.0/12.0, 3.0/5.0*0.0/5.0, &out); }
void LoadData(GArgReader &args, std::unique_ptr<GMatrix> &hOutput) { // Load the dataset by extension if(args.size() < 1) throw Ex("Expected the filename of a datset. (Found end of arguments.)"); const char* szFilename = args.pop_string(); PathData pd; GFile::parsePath(szFilename, &pd); GMatrix data; vector<size_t> abortedCols; vector<size_t> ambiguousCols; const char *input_type; if (args.next_is_flag() && args.if_pop("-input_type")) { input_type = args.pop_string(); } else { /* deduce it from extension (if any) */ input_type = szFilename + pd.extStart; if (*input_type != '.') /* no extension - assume ARFF */ input_type = "arff"; else input_type++; } // Now load the data if(_stricmp(input_type, "arff") == 0) { data.loadArff(szFilename); } else if(_stricmp(input_type, "csv") == 0) { GCSVParser parser; parser.parse(data, szFilename); cerr << "\nParsing Report:\n"; for(size_t i = 0; i < data.cols(); i++) cerr << to_str(i) << ") " << parser.report(i) << "\n"; } else if(_stricmp(input_type, "dat") == 0) { GCSVParser parser; parser.setSeparator('\0'); parser.parse(data, szFilename); cerr << "\nParsing Report:\n"; for(size_t i = 0; i < data.cols(); i++) cerr << to_str(i) << ") " << parser.report(i) << "\n"; } else { throw Ex("Unsupported file format: ", szFilename + pd.extStart); } // Split data into a feature matrix and a label matrix GMatrix* pFeatures = data.cloneSub(0, 0, data.rows(), data.cols()); hOutput.reset(pFeatures); }
void Train(GArgReader &args) { // Load series from file std::unique_ptr<GMatrix> hSeries, hFeatures; LoadData(args, hSeries); GMatrix *pSeries = hSeries.get(); // Split features/labels if(pSeries->cols() == 2) { GMatrix *pFeatures = pSeries->cloneSub(0, 0, pSeries->rows(), 1); GMatrix *pLabels = pSeries->cloneSub(0, 1, pSeries->rows(), 1); hFeatures.reset(pFeatures); hSeries.reset(pLabels); pSeries = pLabels; } else if(pSeries->cols() > 2) { throw Ex("Too many columns!"); } // Parse options GNeuralDecomposition *nd = new GNeuralDecomposition(); while(args.next_is_flag()) { if(args.if_pop("-regularization")) nd->setRegularization(args.pop_double()); else if(args.if_pop("-learningRate")) nd->setLearningRate(args.pop_double()); else if(args.if_pop("-linearUnits")) nd->setLinearUnits(args.pop_uint()); else if(args.if_pop("-softplusUnits")) nd->setSoftplusUnits(args.pop_uint()); else if(args.if_pop("-sigmoidUnits")) nd->setSigmoidUnits(args.pop_uint()); else if(args.if_pop("-epochs")) nd->setEpochs(args.pop_uint()); else if(args.if_pop("-features")) LoadData(args, hFeatures); else if(args.if_pop("-filterLogarithm")) nd->setFilterLogarithm(true); else throw Ex("Invalid option: ", args.peek()); } if(hFeatures.get() == NULL) { // Generate features GMatrix *pFeatures = new GMatrix(pSeries->rows(), 1); for(size_t i = 0; i < pSeries->rows(); i++) { pFeatures->row(i)[0] = i / (double) pSeries->rows(); } hFeatures.reset(pFeatures); } // Train GMatrix *pFeatures = hFeatures.get(); nd->train(*pFeatures, *pSeries); // Output the trained model GDom doc; doc.setRoot(nd->serialize(&doc)); doc.writeJson(cout); }