Beispiel #1
0
GSparseMatrix* loadSparseData(const char* szFilename)
{
	// Load the dataset by extension
	PathData pd;
	GFile::parsePath(szFilename, &pd);
	if(_stricmp(szFilename + pd.extStart, ".arff") == 0)
	{
		// Convert a 3-column dense ARFF file to a sparse matrix
		GMatrix* pData = GMatrix::loadArff(szFilename);
		if(pData->cols() != 3)
			ThrowError("Expected 3 columns: 0) user or row-index, 1) item or col-index, 2) value or rating");
		double m0, r0, m1, r1;
		pData->minAndRange(0, &m0, &r0);
		pData->minAndRange(1, &m1, &r1);
		if(m0 < 0 || m0 > 1e10 || r0 < 2 || r0 > 1e10)
			ThrowError("Invalid row indexes");
		if(m1 < 0 || m1 > 1e10 || r1 < 2 || r1 > 1e10)
			ThrowError("Invalid col indexes");
		GSparseMatrix* pMatrix = new GSparseMatrix(size_t(m0 + r0) + 1, size_t(m1 + r1) + 1, UNKNOWN_REAL_VALUE);
		Holder<GSparseMatrix> hMatrix(pMatrix);
		for(size_t i = 0; i < pData->rows(); i++)
		{
			double* pRow = pData->row(i);
			pMatrix->set(size_t(pRow[0]), size_t(pRow[1]), pRow[2]);
		}
		return hMatrix.release();
	}
	else if(_stricmp(szFilename + pd.extStart, ".sparse") == 0)
	{
		GDom doc;
		doc.loadJson(szFilename);
		return new GSparseMatrix(doc.root());
	}
	ThrowError("Unsupported file format: ", szFilename + pd.extStart);
	return NULL;
}
Beispiel #2
0
std::string to_str(const GDom& doc)
{
	std::ostringstream os;
	doc.writeJsonPretty(os);
	return os.str();
}
Beispiel #3
0
void Extrapolate(GArgReader &args)
{
	// Load the model
	if(args.size() < 1)
	{
		throw Ex("Model not specified.");
	}
	GDom doc;
	doc.loadJson(args.pop_string());
	GLearnerLoader ll(true);
	GSupervisedLearner *pLearner = ll.loadLearner(doc.root());
	std::unique_ptr<GSupervisedLearner> hLearner(pLearner);
	
	// Parse options
	
	double start = 1.0;
	double length = 1.0;
	double step = 0.0002;
	bool useFeatures = false;
	bool outputFeatures = true;
	
	GNeuralDecomposition *nd = (GNeuralDecomposition *) pLearner;
	std::unique_ptr<GMatrix> hFeatures;
	
	while(args.next_is_flag())
	{
		if(args.if_pop("-start"))
		{
			start = args.pop_double();
		}
		else if(args.if_pop("-length"))
		{
			length = args.pop_double();
		}
		else if(args.if_pop("-step"))
		{
			step = args.pop_double();
		}
		else if(args.if_pop("-features"))
		{
			LoadData(args, hFeatures);
			useFeatures = true;
		}
		else if(args.if_pop("-outputFeatures"))
		{
			outputFeatures = true;
		}
		else
		{
			throw Ex("Invalid option: ", args.peek());
		}
	}
	
	// Extrapolate
	GMatrix *pOutput;
	if(useFeatures)
		pOutput = nd->extrapolate(*hFeatures.get());
	else
		pOutput = nd->extrapolate(start, length, step, outputFeatures);
	std::unique_ptr<GMatrix> hOutput(pOutput);
	
	// Output predictions
	pOutput->print(cout);
}
Beispiel #4
0
void Train(GArgReader &args)
{
	// Load series from file
	std::unique_ptr<GMatrix> hSeries, hFeatures;
	LoadData(args, hSeries);
	GMatrix *pSeries = hSeries.get();
	
	// Split features/labels
	if(pSeries->cols() == 2)
	{
		GMatrix *pFeatures = pSeries->cloneSub(0, 0, pSeries->rows(), 1);
		GMatrix *pLabels = pSeries->cloneSub(0, 1, pSeries->rows(), 1);
		hFeatures.reset(pFeatures);
		hSeries.reset(pLabels);
		pSeries = pLabels;
	}
	else if(pSeries->cols() > 2)
	{
		throw Ex("Too many columns!");
	}
	
	// Parse options
	GNeuralDecomposition *nd = new GNeuralDecomposition();
	while(args.next_is_flag())
	{
		if(args.if_pop("-regularization"))
			nd->setRegularization(args.pop_double());
		else if(args.if_pop("-learningRate"))
			nd->setLearningRate(args.pop_double());
		else if(args.if_pop("-linearUnits"))
			nd->setLinearUnits(args.pop_uint());
		else if(args.if_pop("-softplusUnits"))
			nd->setSoftplusUnits(args.pop_uint());
		else if(args.if_pop("-sigmoidUnits"))
			nd->setSigmoidUnits(args.pop_uint());
		else if(args.if_pop("-epochs"))
			nd->setEpochs(args.pop_uint());
		else if(args.if_pop("-features"))
			LoadData(args, hFeatures);
		else if(args.if_pop("-filterLogarithm"))
			nd->setFilterLogarithm(true);
		else
			throw Ex("Invalid option: ", args.peek());
	}
	
	if(hFeatures.get() == NULL)
	{
		// Generate features
		GMatrix *pFeatures = new GMatrix(pSeries->rows(), 1);
		for(size_t i = 0; i < pSeries->rows(); i++)
		{
			pFeatures->row(i)[0] = i / (double) pSeries->rows();
		}
		hFeatures.reset(pFeatures);
	}
	
	// Train
	GMatrix *pFeatures = hFeatures.get();
	nd->train(*pFeatures, *pSeries);
	
	// Output the trained model
	GDom doc;
	doc.setRoot(nd->serialize(&doc));
	doc.writeJson(cout);
}
Beispiel #5
0
void selfOrganizingMap(GArgReader& args){
  // Load the file
  GMatrix* pData = loadData(args.pop_string());
  Holder<GMatrix> hData(pData);

  // Parse arguments
  std::vector<double> netDims;
  unsigned numNodes = 1;
  while(args.next_is_uint()){
    unsigned dim = args.pop_uint();
    netDims.push_back(dim);
    numNodes *= dim;
  }
  if(netDims.size() < 1){
    throw Ex("No dimensions specified for self organizing map.  ",
	       "A map must be at least 1 dimensional.");
  }

  Holder<SOM::ReporterChain> reporters(new SOM::ReporterChain);
  Holder<SOM::TrainingAlgorithm> alg(NULL);
  Holder<GDistanceMetric> weightDist(new GRowDistance);
  Holder<GDistanceMetric> nodeDist(new GRowDistance);
  Holder<SOM::NodeLocationInitialization> topology(new SOM::GridTopology);
  Holder<SOM::NodeWeightInitialization> weightInit
    (new SOM::NodeWeightInitializationTrainingSetSample(NULL));
  Holder<SOM::NeighborhoodWindowFunction> 
    windowFunc(new SOM::GaussianWindowFunction());

  //Loading and saving
  string loadFrom = "";
  string saveTo = "";

  //Parameters for different training algorithms
  string algoName = "batch";
  double startWidth = -1;//Start width - set later if still negative
  double endWidth   = -1;//End width   - set later if still negative
  double startRate = -1;//Start learning rate
  double endRate   = -1;//End learning rate
  unsigned numIter     = 100;//Total iterations
  unsigned numConverge = 1;//#steps for batch to converge

  while(args.next_is_flag()){
    if(args.if_pop("-tofile")){
      saveTo = args.pop_string();
    }else if(args.if_pop("-fromfile")){
      loadFrom = args.pop_string();
    }else if(args.if_pop("-seed")){
      GRand::global().setSeed(args.pop_uint());
    }else if(args.if_pop("-neighborhood")){
      string name = args.pop_string();
      if(name == "gaussian"){
	windowFunc.reset(new SOM::GaussianWindowFunction());
      }else if(name == "uniform"){
	windowFunc.reset(new SOM::UniformWindowFunction());
      }else{
	throw Ex("Only gaussian and uniform are acceptible ",
		   "neighborhood types");
      }
    }else if(args.if_pop("-printMeshEvery")){
      using namespace SOM;
      unsigned interval = args.pop_uint();
      string baseFilename = args.pop_string();
      unsigned xDim = args.pop_uint();
      unsigned yDim = args.pop_uint();
      bool showTrain = false;
      if(args.if_pop("showTrain") || args.if_pop("showtrain")){
	showTrain = true;
      }
      smart_ptr<Reporter> weightReporter
	(new SVG2DWeightReporter(baseFilename, xDim, yDim, showTrain));
      Holder<IterationIntervalReporter> intervalReporter
	(new IterationIntervalReporter(weightReporter, interval));
      reporters->add(intervalReporter.release());
    }else if(args.if_pop("-batchTrain")){
      algoName = "batch";
      startWidth = args.pop_double();
      endWidth = args.pop_double();
      numIter = args.pop_uint();
      numConverge = args.pop_uint();
    }else if(args.if_pop("-stdTrain")){
      algoName = "standard";
      startWidth = args.pop_double();
      endWidth = args.pop_double();
      startRate = args.pop_double();
      endRate = args.pop_double();
      numIter = args.pop_uint();
    }else{
      throw Ex("Invalid option: ", args.peek());
    }
  }

  //Create the training algorithm
  Holder<SOM::TrainingAlgorithm> algo;
  if(algoName == "batch"){
    double netRadius = *std::max_element(netDims.begin(), netDims.end());
    if(startWidth < 0){ startWidth = 2*netRadius; }
    if(endWidth < 0){ endWidth = 1; }
    algo.reset( new SOM::BatchTraining
      (startWidth, endWidth, numIter, numConverge,
       weightInit.release(), windowFunc.release(),
       reporters.release()));
  }else if(algoName == "standard"){
    algo.reset( new SOM::TraditionalTraining
      (startWidth, endWidth, startRate, endRate, numIter,
       weightInit.release(), windowFunc.release(),
       reporters.release()));
  }else{
    throw Ex("Unknown type of training algorithm: \"",
	       algoName, "\"");
  }

  //Create the network & transform the data
  Holder<GSelfOrganizingMap> som;
  Holder<GMatrix> out;
  
  if(loadFrom == ""){
    //Create map from arguments given
    som.reset(new GSelfOrganizingMap
      (netDims, numNodes, topology.release(), algo.release(), 
       weightDist.release(), nodeDist.release()));
    //Train the network and transform the data in place
    out.reset(som->doit(*pData));
  }else{
    //Create map from file
    GDom source;
    source.loadJson(loadFrom.c_str());
    som.reset(new GSelfOrganizingMap(source.root()));
    //Transform using the loaded network
    out.reset(som->transformBatch(*pData));
  }

  //Save the trained network
  if(saveTo != ""){
    GDom serialized;
    GDomNode* root = som->serialize(&serialized);
    serialized.setRoot(root);
    serialized.saveJson(saveTo.c_str());
  }

  //Print the result
  out->print(cout);
}
Beispiel #6
0
void principalComponentAnalysis(GArgReader& args)
{
	// Load the file
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int nTargetDims = args.pop_uint();

	// Parse options
	string roundTrip;
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	string eigenvalues;
	string components;
	string modelIn;
	string modelOut;
	bool aboutOrigin = false;
	while(args.next_is_flag())
	{
		if(args.if_pop("-seed"))
			seed = args.pop_uint();
		else if(args.if_pop("-roundtrip"))
			roundTrip = args.pop_string();
		else if(args.if_pop("-eigenvalues"))
			eigenvalues = args.pop_string();
		else if(args.if_pop("-components"))
			components = args.pop_string();
		else if(args.if_pop("-aboutorigin"))
			aboutOrigin = true;
		else if(args.if_pop("-modelin"))
			modelIn = args.pop_string();
		else if(args.if_pop("-modelout"))
			modelOut = args.pop_string();
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Transform the data
	GRand prng(seed);
	GPCA* pTransform = NULL;
	if(modelIn.length() > 0)
	{
		GDom doc;
		doc.loadJson(modelIn.c_str());
		GLearnerLoader ll(prng);
		pTransform = new GPCA(doc.root(), ll);
	}
	else
	{
		pTransform = new GPCA(nTargetDims, &prng);
		if(aboutOrigin)
			pTransform->aboutOrigin();
		if(eigenvalues.length() > 0)
			pTransform->computeEigVals();
		pTransform->train(*pData);
	}
	Holder<GPCA> hTransform(pTransform);

	GMatrix* pDataAfter = pTransform->transformBatch(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);

	// Save the eigenvalues
	if(eigenvalues.length() > 0)
	{
		GArffRelation* pRelation = new GArffRelation();
		pRelation->addAttribute("eigenvalues", 0, NULL);
		sp_relation pRel = pRelation;
		GMatrix dataEigenvalues(pRel);
		dataEigenvalues.newRows(nTargetDims);
		double* pEigVals = pTransform->eigVals();
		for(int i = 0; i < nTargetDims; i++)
			dataEigenvalues[i][0] = pEigVals[i];
		dataEigenvalues.saveArff(eigenvalues.c_str());
	}

	// Save the components
	if(components.length() > 0)
		pTransform->components()->saveArff(components.c_str());

	// Do the round-trip
	if(roundTrip.size() > 0)
	{
		GMatrix roundTripped(pData->rows(), pData->cols());
		for(size_t i = 0; i < pData->rows(); i++)
			pTransform->untransform(pDataAfter->row(i), roundTripped.row(i));
		roundTripped.saveArff(roundTrip.c_str());
	}

	if(modelOut.length() > 0)
	{
		GDom doc;
		doc.setRoot(pTransform->serialize(&doc));
		doc.saveJson(modelOut.c_str());
	}

	pDataAfter->print(cout);
}
Beispiel #7
0
void unsupervisedBackProp(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int targetDims = args.pop_uint();

	// Parse Options
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GUnsupervisedBackProp* pUBP = new GUnsupervisedBackProp(targetDims, &prng);
	Holder<GUnsupervisedBackProp> hUBP(pUBP);
	vector<size_t> paramRanges;
	string sModelOut;
	string sProgress;
	bool inputBias = true;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else if(args.if_pop("-addlayer"))
			pUBP->neuralNet()->addLayer(args.pop_uint());
		else if(args.if_pop("-params"))
		{
			if(pUBP->jitterer())
				throw Ex("You can't change the params after you add an image jitterer");
			size_t paramDims = args.pop_uint();
			for(size_t i = 0; i < paramDims; i++)
				paramRanges.push_back(args.pop_uint());
		}
		else if(args.if_pop("-modelin"))
		{
			GDom doc;
			doc.loadJson(args.pop_string());
			GLearnerLoader ll(prng);
			pUBP = new GUnsupervisedBackProp(doc.root(), ll);
			hUBP.reset(pUBP);
		}
		else if(args.if_pop("-modelout"))
			sModelOut = args.pop_string();
		else if(args.if_pop("-intrinsicin"))
		{
			GMatrix* pInt = new GMatrix();
			pInt->loadArff(args.pop_string());
			pUBP->setIntrinsic(pInt);
		}
		else if(args.if_pop("-jitter"))
		{
			if(paramRanges.size() != 2)
				throw Ex("The params must be set to 2 before a tweaker is set");
			size_t channels = args.pop_uint();
			double rot = args.pop_double();
			double trans = args.pop_double();
			double zoom = args.pop_double();
			GImageJitterer* pJitterer = new GImageJitterer(paramRanges[0], paramRanges[1], channels, rot, trans, zoom);
			pUBP->setJitterer(pJitterer);
		}
		else if(args.if_pop("-noinputbias"))
			inputBias = false;
		else if(args.if_pop("-progress"))
		{
			sProgress = args.pop_string();
			pUBP->trackProgress();
		}
		else if(args.if_pop("-onepass"))
			pUBP->onePass();
		else
			throw Ex("Invalid option: ", args.peek());
	}
	pUBP->setParams(paramRanges);
	pUBP->setUseInputBias(inputBias);

	// Transform the data
	GMatrix* pDataAfter = pUBP->doit(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);

	// Save the model (if requested)
	if(sModelOut.length() > 0)
	{
		GDom doc;
		doc.setRoot(pUBP->serialize(&doc));
		doc.saveJson(sModelOut.c_str());
	}
	if(sProgress.length() > 0)
		pUBP->progress().saveArff(sProgress.c_str());
}