GSparseMatrix* loadSparseData(const char* szFilename) { // Load the dataset by extension PathData pd; GFile::parsePath(szFilename, &pd); if(_stricmp(szFilename + pd.extStart, ".arff") == 0) { // Convert a 3-column dense ARFF file to a sparse matrix GMatrix* pData = GMatrix::loadArff(szFilename); if(pData->cols() != 3) ThrowError("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating"); double m0, r0, m1, r1; pData->minAndRange(0, &m0, &r0); pData->minAndRange(1, &m1, &r1); if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10) ThrowError("Invalid row indexes"); if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10) ThrowError("Invalid col indexes"); GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE); Holder<GSparseMatrix> hMatrix(pMatrix); for(size_t i = 0; i < pData->rows(); i++) { double* pRow = pData->row(i); pMatrix->set(size_t(pRow[0]), size_t(pRow[1]), pRow[2]); } return hMatrix.release(); } else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0) { GDom doc; doc.loadJson(szFilename); return new GSparseMatrix(doc.root()); } ThrowError("Unsupported file format: ", szFilename + pd.extStart); return NULL; }
std::string to_str(const GDom& doc) { std::ostringstream os; doc.writeJsonPretty(os); return os.str(); }
void Extrapolate(GArgReader &args) { // Load the model if(args.size() < 1) { throw Ex("Model not specified."); } GDom doc; doc.loadJson(args.pop_string()); GLearnerLoader ll(true); GSupervisedLearner *pLearner = ll.loadLearner(doc.root()); std::unique_ptr<GSupervisedLearner> hLearner(pLearner); // Parse options double start = 1.0; double length = 1.0; double step = 0.0002; bool useFeatures = false; bool outputFeatures = true; GNeuralDecomposition *nd = (GNeuralDecomposition *) pLearner; std::unique_ptr<GMatrix> hFeatures; while(args.next_is_flag()) { if(args.if_pop("-start")) { start = args.pop_double(); } else if(args.if_pop("-length")) { length = args.pop_double(); } else if(args.if_pop("-step")) { step = args.pop_double(); } else if(args.if_pop("-features")) { LoadData(args, hFeatures); useFeatures = true; } else if(args.if_pop("-outputFeatures")) { outputFeatures = true; } else { throw Ex("Invalid option: ", args.peek()); } } // Extrapolate GMatrix *pOutput; if(useFeatures) pOutput = nd->extrapolate(*hFeatures.get()); else pOutput = nd->extrapolate(start, length, step, outputFeatures); std::unique_ptr<GMatrix> hOutput(pOutput); // Output predictions pOutput->print(cout); }
void Train(GArgReader &args) { // Load series from file std::unique_ptr<GMatrix> hSeries, hFeatures; LoadData(args, hSeries); GMatrix *pSeries = hSeries.get(); // Split features/labels if(pSeries->cols() == 2) { GMatrix *pFeatures = pSeries->cloneSub(0, 0, pSeries->rows(), 1); GMatrix *pLabels = pSeries->cloneSub(0, 1, pSeries->rows(), 1); hFeatures.reset(pFeatures); hSeries.reset(pLabels); pSeries = pLabels; } else if(pSeries->cols() > 2) { throw Ex("Too many columns!"); } // Parse options GNeuralDecomposition *nd = new GNeuralDecomposition(); while(args.next_is_flag()) { if(args.if_pop("-regularization")) nd->setRegularization(args.pop_double()); else if(args.if_pop("-learningRate")) nd->setLearningRate(args.pop_double()); else if(args.if_pop("-linearUnits")) nd->setLinearUnits(args.pop_uint()); else if(args.if_pop("-softplusUnits")) nd->setSoftplusUnits(args.pop_uint()); else if(args.if_pop("-sigmoidUnits")) nd->setSigmoidUnits(args.pop_uint()); else if(args.if_pop("-epochs")) nd->setEpochs(args.pop_uint()); else if(args.if_pop("-features")) LoadData(args, hFeatures); else if(args.if_pop("-filterLogarithm")) nd->setFilterLogarithm(true); else throw Ex("Invalid option: ", args.peek()); } if(hFeatures.get() == NULL) { // Generate features GMatrix *pFeatures = new GMatrix(pSeries->rows(), 1); for(size_t i = 0; i < pSeries->rows(); i++) { pFeatures->row(i)[0] = i / (double) pSeries->rows(); } hFeatures.reset(pFeatures); } // Train GMatrix *pFeatures = hFeatures.get(); nd->train(*pFeatures, *pSeries); // Output the trained model GDom doc; doc.setRoot(nd->serialize(&doc)); doc.writeJson(cout); }
void selfOrganizingMap(GArgReader& args){ // Load the file GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); // Parse arguments std::vector<double> netDims; unsigned numNodes = 1; while(args.next_is_uint()){ unsigned dim = args.pop_uint(); netDims.push_back(dim); numNodes *= dim; } if(netDims.size() < 1){ throw Ex("No dimensions specified for self organizing map. ", "A map must be at least 1 dimensional."); } Holder<SOM::ReporterChain> reporters(new SOM::ReporterChain); Holder<SOM::TrainingAlgorithm> alg(NULL); Holder<GDistanceMetric> weightDist(new GRowDistance); Holder<GDistanceMetric> nodeDist(new GRowDistance); Holder<SOM::NodeLocationInitialization> topology(new SOM::GridTopology); Holder<SOM::NodeWeightInitialization> weightInit (new SOM::NodeWeightInitializationTrainingSetSample(NULL)); Holder<SOM::NeighborhoodWindowFunction> windowFunc(new SOM::GaussianWindowFunction()); //Loading and saving string loadFrom = ""; string saveTo = ""; //Parameters for different training algorithms string algoName = "batch"; double startWidth = -1;//Start width - set later if still negative double endWidth = -1;//End width - set later if still negative double startRate = -1;//Start learning rate double endRate = -1;//End learning rate unsigned numIter = 100;//Total iterations unsigned numConverge = 1;//#steps for batch to converge while(args.next_is_flag()){ if(args.if_pop("-tofile")){ saveTo = args.pop_string(); }else if(args.if_pop("-fromfile")){ loadFrom = args.pop_string(); }else if(args.if_pop("-seed")){ GRand::global().setSeed(args.pop_uint()); }else if(args.if_pop("-neighborhood")){ string name = args.pop_string(); if(name == "gaussian"){ windowFunc.reset(new SOM::GaussianWindowFunction()); }else if(name == "uniform"){ windowFunc.reset(new SOM::UniformWindowFunction()); }else{ throw Ex("Only gaussian and uniform are acceptible ", "neighborhood types"); } }else if(args.if_pop("-printMeshEvery")){ using namespace SOM; unsigned interval = args.pop_uint(); string baseFilename = args.pop_string(); unsigned xDim = args.pop_uint(); unsigned yDim = args.pop_uint(); bool showTrain = false; if(args.if_pop("showTrain") || args.if_pop("showtrain")){ showTrain = true; } smart_ptr<Reporter> weightReporter (new SVG2DWeightReporter(baseFilename, xDim, yDim, showTrain)); Holder<IterationIntervalReporter> intervalReporter (new IterationIntervalReporter(weightReporter, interval)); reporters->add(intervalReporter.release()); }else if(args.if_pop("-batchTrain")){ algoName = "batch"; startWidth = args.pop_double(); endWidth = args.pop_double(); numIter = args.pop_uint(); numConverge = args.pop_uint(); }else if(args.if_pop("-stdTrain")){ algoName = "standard"; startWidth = args.pop_double(); endWidth = args.pop_double(); startRate = args.pop_double(); endRate = args.pop_double(); numIter = args.pop_uint(); }else{ throw Ex("Invalid option: ", args.peek()); } } //Create the training algorithm Holder<SOM::TrainingAlgorithm> algo; if(algoName == "batch"){ double netRadius = *std::max_element(netDims.begin(), netDims.end()); if(startWidth < 0){ startWidth = 2*netRadius; } if(endWidth < 0){ endWidth = 1; } algo.reset( new SOM::BatchTraining (startWidth, endWidth, numIter, numConverge, weightInit.release(), windowFunc.release(), reporters.release())); }else if(algoName == "standard"){ algo.reset( new SOM::TraditionalTraining (startWidth, endWidth, startRate, endRate, numIter, weightInit.release(), windowFunc.release(), reporters.release())); }else{ throw Ex("Unknown type of training algorithm: \"", algoName, "\""); } //Create the network & transform the data Holder<GSelfOrganizingMap> som; Holder<GMatrix> out; if(loadFrom == ""){ //Create map from arguments given som.reset(new GSelfOrganizingMap (netDims, numNodes, topology.release(), algo.release(), weightDist.release(), nodeDist.release())); //Train the network and transform the data in place out.reset(som->doit(*pData)); }else{ //Create map from file GDom source; source.loadJson(loadFrom.c_str()); som.reset(new GSelfOrganizingMap(source.root())); //Transform using the loaded network out.reset(som->transformBatch(*pData)); } //Save the trained network if(saveTo != ""){ GDom serialized; GDomNode* root = som->serialize(&serialized); serialized.setRoot(root); serialized.saveJson(saveTo.c_str()); } //Print the result out->print(cout); }
void principalComponentAnalysis(GArgReader& args) { // Load the file GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); int nTargetDims = args.pop_uint(); // Parse options string roundTrip; unsigned int seed = getpid() * (unsigned int)time(NULL); string eigenvalues; string components; string modelIn; string modelOut; bool aboutOrigin = false; while(args.next_is_flag()) { if(args.if_pop("-seed")) seed = args.pop_uint(); else if(args.if_pop("-roundtrip")) roundTrip = args.pop_string(); else if(args.if_pop("-eigenvalues")) eigenvalues = args.pop_string(); else if(args.if_pop("-components")) components = args.pop_string(); else if(args.if_pop("-aboutorigin")) aboutOrigin = true; else if(args.if_pop("-modelin")) modelIn = args.pop_string(); else if(args.if_pop("-modelout")) modelOut = args.pop_string(); else throw Ex("Invalid option: ", args.peek()); } // Transform the data GRand prng(seed); GPCA* pTransform = NULL; if(modelIn.length() > 0) { GDom doc; doc.loadJson(modelIn.c_str()); GLearnerLoader ll(prng); pTransform = new GPCA(doc.root(), ll); } else { pTransform = new GPCA(nTargetDims, &prng); if(aboutOrigin) pTransform->aboutOrigin(); if(eigenvalues.length() > 0) pTransform->computeEigVals(); pTransform->train(*pData); } Holder<GPCA> hTransform(pTransform); GMatrix* pDataAfter = pTransform->transformBatch(*pData); Holder<GMatrix> hDataAfter(pDataAfter); // Save the eigenvalues if(eigenvalues.length() > 0) { GArffRelation* pRelation = new GArffRelation(); pRelation->addAttribute("eigenvalues", 0, NULL); sp_relation pRel = pRelation; GMatrix dataEigenvalues(pRel); dataEigenvalues.newRows(nTargetDims); double* pEigVals = pTransform->eigVals(); for(int i = 0; i < nTargetDims; i++) dataEigenvalues[i][0] = pEigVals[i]; dataEigenvalues.saveArff(eigenvalues.c_str()); } // Save the components if(components.length() > 0) pTransform->components()->saveArff(components.c_str()); // Do the round-trip if(roundTrip.size() > 0) { GMatrix roundTripped(pData->rows(), pData->cols()); for(size_t i = 0; i < pData->rows(); i++) pTransform->untransform(pDataAfter->row(i), roundTripped.row(i)); roundTripped.saveArff(roundTrip.c_str()); } if(modelOut.length() > 0) { GDom doc; doc.setRoot(pTransform->serialize(&doc)); doc.saveJson(modelOut.c_str()); } pDataAfter->print(cout); }
void unsupervisedBackProp(GArgReader& args) { // Load the file and params GMatrix* pData = loadData(args.pop_string()); Holder<GMatrix> hData(pData); int targetDims = args.pop_uint(); // Parse Options unsigned int nSeed = getpid() * (unsigned int)time(NULL); GRand prng(nSeed); GUnsupervisedBackProp* pUBP = new GUnsupervisedBackProp(targetDims, &prng); Holder<GUnsupervisedBackProp> hUBP(pUBP); vector<size_t> paramRanges; string sModelOut; string sProgress; bool inputBias = true; while(args.size() > 0) { if(args.if_pop("-seed")) prng.setSeed(args.pop_uint()); else if(args.if_pop("-addlayer")) pUBP->neuralNet()->addLayer(args.pop_uint()); else if(args.if_pop("-params")) { if(pUBP->jitterer()) throw Ex("You can't change the params after you add an image jitterer"); size_t paramDims = args.pop_uint(); for(size_t i = 0; i < paramDims; i++) paramRanges.push_back(args.pop_uint()); } else if(args.if_pop("-modelin")) { GDom doc; doc.loadJson(args.pop_string()); GLearnerLoader ll(prng); pUBP = new GUnsupervisedBackProp(doc.root(), ll); hUBP.reset(pUBP); } else if(args.if_pop("-modelout")) sModelOut = args.pop_string(); else if(args.if_pop("-intrinsicin")) { GMatrix* pInt = new GMatrix(); pInt->loadArff(args.pop_string()); pUBP->setIntrinsic(pInt); } else if(args.if_pop("-jitter")) { if(paramRanges.size() != 2) throw Ex("The params must be set to 2 before a tweaker is set"); size_t channels = args.pop_uint(); double rot = args.pop_double(); double trans = args.pop_double(); double zoom = args.pop_double(); GImageJitterer* pJitterer = new GImageJitterer(paramRanges[0], paramRanges[1], channels, rot, trans, zoom); pUBP->setJitterer(pJitterer); } else if(args.if_pop("-noinputbias")) inputBias = false; else if(args.if_pop("-progress")) { sProgress = args.pop_string(); pUBP->trackProgress(); } else if(args.if_pop("-onepass")) pUBP->onePass(); else throw Ex("Invalid option: ", args.peek()); } pUBP->setParams(paramRanges); pUBP->setUseInputBias(inputBias); // Transform the data GMatrix* pDataAfter = pUBP->doit(*pData); Holder<GMatrix> hDataAfter(pDataAfter); pDataAfter->print(cout); // Save the model (if requested) if(sModelOut.length() > 0) { GDom doc; doc.setRoot(pUBP->serialize(&doc)); doc.saveJson(sModelOut.c_str()); } if(sProgress.length() > 0) pUBP->progress().saveArff(sProgress.c_str()); }