コード例 #1
0
ファイル: main.cpp プロジェクト: litaoshao/waffles
void singularValueDecomposition(GArgReader& args)
{
	// Load
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);

	// Parse options
	string ufilename = "u.arff";
	string sigmafilename;
	string vfilename = "v.arff";
	int maxIters = 100;
	while(args.size() > 0)
	{
		if(args.if_pop("-ufilename"))
			ufilename = args.pop_string();
		else if(args.if_pop("-sigmafilename"))
			sigmafilename = args.pop_string();
		else if(args.if_pop("-vfilename"))
			vfilename = args.pop_string();
		else if(args.if_pop("-maxiters"))
			maxIters = args.pop_uint();
		else
			ThrowError("Invalid option: ", args.peek());
	}

	GMatrix* pU;
	double* pDiag;
	GMatrix* pV;
	pData->singularValueDecomposition(&pU, &pDiag, &pV, false, maxIters);
	Holder<GMatrix> hU(pU);
	ArrayHolder<double> hDiag(pDiag);
	Holder<GMatrix> hV(pV);
	pU->saveArff(ufilename.c_str());
	pV->saveArff(vfilename.c_str());
	if(sigmafilename.length() > 0)
	{
		GMatrix sigma(pU->rows(), pV->rows());
		sigma.setAll(0.0);
		size_t m = std::min(sigma.rows(), (size_t)sigma.cols());
		for(size_t i = 0; i < m; i++)
			sigma.row(i)[i] = pDiag[i];
		sigma.saveArff(sigmafilename.c_str());
	}
	else
	{
		GVec::print(cout, 14, pDiag, std::min(pU->rows(), pV->rows()));
		cout << "\n";
	}
}
コード例 #2
0
ファイル: main.cpp プロジェクト: litaoshao/waffles
void split(GArgReader& args)
{
	// Load
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	int pats = (int)pData->rows() - args.pop_uint();
	if(pats < 0)
		ThrowError("out of range. The data only has ", to_str(pData->rows()), " rows.");
	const char* szFilename1 = args.pop_string();
	const char* szFilename2 = args.pop_string();

	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	bool shouldShuffle = false;
	while(args.size() > 0){
		if(args.if_pop("-shuffle")){
			shouldShuffle = true;
		}else if(args.if_pop("-seed")){
			nSeed = args.pop_uint();
		}else
			ThrowError("Invalid option: ", args.peek());
	}

	// Shuffle if necessary
	GRand rng(nSeed);
	if(shouldShuffle){
		pData->shuffle(rng);
	}

	// Split
	GMatrix other(pData->relation());
	pData->splitBySize(&other, pats);
	pData->saveArff(szFilename1);
	other.saveArff(szFilename2);
}
コード例 #3
0
ファイル: loader.cpp プロジェクト: AntonOrnatskyi/waffles
void Loader::loadAirPassengerData(GMatrix &trainFeat, GMatrix &trainLab, GMatrix &testFeat, GMatrix &testLab)
{
	GMatrix raw;
	raw.loadArff("data/air_passengers.arff");
	
	size_t dims = 1;
	size_t offset = 0;
	size_t train_size = 72;
	size_t test_size = 72;
	
	double *x, *y;
	
	trainFeat.resize(train_size, 1);
	trainLab.resize(train_size, dims);
	testFeat.resize(test_size, 1);
	testLab.resize(test_size, dims);
	
	double log_10 = log(10);
	double vert_offset = 2;
	double scale = 10;//0.1;
	
	for(size_t i = 0; i < train_size + test_size; i++)
	{
		if(i < train_size)
		{
			x = trainFeat[i];
			y = trainLab[i];
		}
		else
		{
			x = testFeat[i - train_size];
			y = testLab[i - train_size];
		}
		
		*x = double(i) / train_size;
		*y = ((log(scale * raw[offset + i][0]) / log_10) - vert_offset);
	}
	
	trainLab.saveArff("out/train.arff");
	testLab.saveArff("out/test.arff");
}
コード例 #4
0
ファイル: loader.cpp プロジェクト: AntonOrnatskyi/waffles
void Loader::loadOzoneData(GMatrix &trainFeat, GMatrix &trainLab, GMatrix &testFeat, GMatrix &testLab)
{
	GMatrix raw;
	raw.loadArff("data/mhsets_monthly-ozone.arff");
	
	size_t dims = 1;
	size_t offset = 0;
	size_t train_size = 108;
	size_t test_size = 44;
	
	double *x, *y;
	
	trainFeat.resize(train_size, 1);
	trainLab.resize(train_size, dims);
	testFeat.resize(test_size, 1);
	testLab.resize(test_size, dims);
	
	for(size_t i = 0; i < train_size + test_size; i++)
	{
		if(i < train_size)
		{
			x = trainFeat[i];
			y = trainLab[i];
		}
		else
		{
			x = testFeat[i - train_size];
			y = testLab[i - train_size];
		}
		
		*x = double(i) / train_size;
		*y = log(raw[offset + i][0]) / log(10);
	}
	
	trainLab.saveArff("out/train.arff");
	testLab.saveArff("out/test.arff");
}
コード例 #5
0
ファイル: main.cpp プロジェクト: kslazarev/waffles
void attributeSelector(GArgReader& args)
{
	// Load the data
	size_t labelDims;
	std::vector<size_t> originalIndices;
	GMatrix data;
	loadDataWithSwitches(data, args, labelDims, originalIndices);

	// Parse the options
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	int targetFeatures = 1;
	string outFilename = "";
	while(args.next_is_flag())
	{
		if(args.if_pop("-seed"))
			seed = args.pop_uint();
		else if(args.if_pop("-out"))
		{
			targetFeatures = args.pop_uint();
			outFilename = args.pop_string();
		}
		else
			throw Ex("Invalid neighbor finder option: ", args.peek());
	}

	// Do the attribute selection
	GRand prng(seed);
	GAttributeSelector as(labelDims, targetFeatures, &prng);
	if(outFilename.length() > 0)
	{
		as.train(data);
		GMatrix* pDataOut = as.transformBatch(data);
		Holder<GMatrix> hDataOut(pDataOut);
		cout << "Reduced data saved to " << outFilename.c_str() << ".\n";
		pDataOut->saveArff(outFilename.c_str());
	}
	else
		as.train(data);
	cout << "\nAttribute rankings from most salient to least salient. (Attributes are zero-indexed.)\n";
	GArffRelation* pRel = (GArffRelation*)data.relation().get();
	for(size_t i = 0; i < as.ranks().size(); i++)
	  cout << originalIndices.at(as.ranks()[i]) << " " << pRel->attrName(as.ranks()[i]) << "\n";
}