예제 #1
0
파일: main.cpp 프로젝트: kslazarev/waffles
void lle(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	int targetDims = args.pop_uint();

	// Parse Options
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Transform the data
	GLLE transform(pNF->neighborCount(), targetDims, &prng);
	transform.setNeighborFinder(pNF);
	GMatrix* pDataAfter = transform.doit(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);
}
예제 #2
0
파일: main.cpp 프로젝트: b2020b/waffles
void isomap(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	int targetDims = args.pop_uint();

	// Parse Options
	bool tolerant = false;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else if(args.if_pop("-tolerant"))
			tolerant = true;
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Transform the data
	GIsomap transform(pNF->neighborCount(), targetDims, &prng);
	transform.setNeighborFinder(pNF);
	if(tolerant)
		transform.dropDisconnectedPoints();
	GMatrix* pDataAfter = transform.reduce(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);
}
예제 #3
0
파일: main.cpp 프로젝트: b2020b/waffles
void breadthFirstUnfolding(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	size_t nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	int targetDims = args.pop_uint();

	// Parse Options
	size_t reps = 1;
	Holder<GMatrix> hControlData(NULL);
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			nSeed = args.pop_uint();
		else if(args.if_pop("-reps"))
			reps = args.pop_uint();
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Transform the data
	GBreadthFirstUnfolding transform(reps, pNF->neighborCount(), targetDims);
	transform.rand().setSeed(nSeed);
	transform.setNeighborFinder(pNF);
	GMatrix* pDataAfter = transform.reduce(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);
}
예제 #4
0
파일: main.cpp 프로젝트: kslazarev/waffles
void ManifoldSculpting(GArgReader& args)
{
	// Load the file and params
	GMatrix* pData = loadData(args.pop_string());
	Holder<GMatrix> hData(pData);
	unsigned int nSeed = getpid() * (unsigned int)time(NULL);
	GRand prng(nSeed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pData, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	size_t targetDims = args.pop_uint();

	// Parse Options
	const char* szPreprocessedData = NULL;
	double scaleRate = 0.999;
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else if(args.if_pop("-continue"))
			szPreprocessedData = args.pop_string();
		else if(args.if_pop("-scalerate"))
			scaleRate = args.pop_double();
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Load the hint data
	GMatrix* pDataHint = NULL;
	Holder<GMatrix> hDataHint(NULL);
	if(szPreprocessedData)
	{
		pDataHint = loadData(szPreprocessedData);
		hDataHint.reset(pDataHint);
		if(pDataHint->relation()->size() != targetDims)
			throw Ex("Wrong number of dims in the hint data");
		if(pDataHint->rows() != pData->rows())
			throw Ex("Wrong number of patterns in the hint data");
	}

	// Transform the data
	GManifoldSculpting transform(pNF->neighborCount(), targetDims, &prng);
	transform.setSquishingRate(scaleRate);
	if(pDataHint)
		transform.setPreprocessedData(hDataHint.release());
	transform.setNeighborFinder(pNF);
	GMatrix* pDataAfter = transform.doit(*pData);
	Holder<GMatrix> hDataAfter(pDataAfter);
	pDataAfter->print(cout);
}
예제 #5
0
파일: main.cpp 프로젝트: kslazarev/waffles
void blendEmbeddings(GArgReader& args)
{
	// Load the files and params
	GMatrix* pDataOrig = loadData(args.pop_string());
	Holder<GMatrix> hDataOrig(pDataOrig);
	unsigned int seed = getpid() * (unsigned int)time(NULL);
	GRand prng(seed);
	GNeighborFinder* pNF = instantiateNeighborFinder(pDataOrig, &prng, args);
	Holder<GNeighborFinder> hNF(pNF);
	GMatrix* pDataA = loadData(args.pop_string());
	Holder<GMatrix> hDataA(pDataA);
	GMatrix* pDataB = loadData(args.pop_string());
	Holder<GMatrix> hDataB(pDataB);
	if(pDataA->rows() != pDataOrig->rows() || pDataB->rows() != pDataOrig->rows())
		throw Ex("mismatching number of rows");
	if(pDataA->cols() != pDataB->cols())
		throw Ex("mismatching number of cols");

	// Parse Options
	while(args.size() > 0)
	{
		if(args.if_pop("-seed"))
			prng.setSeed(args.pop_uint());
		else
			throw Ex("Invalid option: ", args.peek());
	}

	// Get a neighbor table
	if(!pNF->isCached())
	{
		GNeighborFinderCacheWrapper* pNF2 = new GNeighborFinderCacheWrapper(hNF.release(), true);
		hNF.reset(pNF2);
		pNF = pNF2;
	}
	((GNeighborFinderCacheWrapper*)pNF)->fillCache();
	size_t* pNeighborTable = ((GNeighborFinderCacheWrapper*)pNF)->cache();

	// Do the blending
	size_t startPoint = (size_t)prng.next(pDataA->rows());
	double* pRatios = new double[pDataA->rows()];
	ArrayHolder<double> hRatios(pRatios);
	GVec::setAll(pRatios, 0.5, pDataA->rows());
	GMatrix* pDataC = GManifold::blendEmbeddings(pDataA, pRatios, pDataB, pNF->neighborCount(), pNeighborTable, startPoint);
	Holder<GMatrix> hDataC(pDataC);
	pDataC->print(cout);
}
예제 #6
0
// virtual
void GNeighborTransducer::transduce(GData* pDataLabeled, GData* pDataUnlabeled, int labelDims)
{
	if(labelDims != 1)
		ThrowError("Only 1 nominal label is supported");
	if(!pDataLabeled->relation()->areNominal(pDataLabeled->relation()->size() - 1, 1))
		ThrowError("Only nominal labels are supported");
	if(!pDataLabeled->relation()->areContinuous(0, pDataLabeled->relation()->size() - 1))
		ThrowError("Only continuous features are supported");
	if(pDataLabeled->cols() != pDataUnlabeled->cols())
		ThrowError("relations don't match");

	// Make a dataset containing all rows
	GData dataAll(pDataLabeled->relation());
	dataAll.reserve(pDataLabeled->rows() + pDataUnlabeled->rows());
	GReleaseDataHolder hDataAll(&dataAll);
	for(size_t i = 0; i < pDataUnlabeled->rows(); i++)
		dataAll.takeRow(pDataUnlabeled->row(i));
	for(size_t i = 0; i < pDataLabeled->rows(); i++)
		dataAll.takeRow(pDataLabeled->row(i));
	int featureDims = pDataLabeled->cols() - labelDims;
	sp_relation pRelInputs = new GUniformRelation(featureDims, 0);
	dataAll.setRelation(pRelInputs);

	// Find friends
	GNeighborFinder* pNF;
	if(m_intrinsicDims == 0)
		pNF = new GNeighborFinderCacheWrapper(new GKdTree(&dataAll, 0, m_friendCount, NULL, true), true);
	else
		pNF = new GManifoldNeighborFinder(
			&dataAll,
			m_friendCount, // littleK
			m_friendCount * 4, // bigK
			m_intrinsicDims, // intrinsicDims
			m_alpha, // alpha
			m_beta, // beta
			false, // prune?
			m_pRand);
	Holder<GNeighborFinder> hNF(pNF);
	GTEMPBUF(size_t, neighbors, m_friendCount);
	int labelValues = pDataLabeled->relation()->valueCount(featureDims);
	GTEMPBUF(double, tallys, labelValues);

	// Label the unlabeled patterns
	GBitTable labeled(pDataUnlabeled->rows());
	GData labelList(3); // pattern index, most likely label, confidence
	labelList.newRows(pDataUnlabeled->rows());
	for(size_t i = 0; i < pDataUnlabeled->rows(); i++)
		labelList.row(i)[0] = i;
	while(labelList.rows() > 0)
	{
		// Compute the most likely label and the confidence for each pattern
		for(size_t i = 0; i < labelList.rows(); i++)
		{
			// Find the most common label
			double* pRow = labelList.row(i);
			size_t index = (size_t)pRow[0];
			pNF->neighbors(neighbors, index);
			GVec::setAll(tallys, 0.0, labelValues);
			for(int j = 0; j < m_friendCount; j++)
			{
				if(neighbors[j] >= dataAll.rows())
					continue;
				double* pFriend = dataAll.row(neighbors[j]);
				if(neighbors[j] >= pDataUnlabeled->rows())
				{
					if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues)
						tallys[(int)pFriend[featureDims]] += 1.0;
				}
				else if(labeled.bit(neighbors[j]))
				{
					if((int)pFriend[featureDims] >= 0 && (int)pFriend[featureDims] < labelValues)
						tallys[(int)pFriend[featureDims]] += 0.6;
				}
			}
			int label = GVec::indexOfMax(tallys, labelValues, m_pRand);
			double conf = tallys[label];

			// Penalize for dissenting votes
			for(int j = 0; j < m_friendCount; j++)
			{
				if(neighbors[j] >= dataAll.rows())
					continue;
				double* pFriend = dataAll.row(neighbors[j]);
				if(neighbors[j] >= pDataUnlabeled->rows())
				{
					if((int)pFriend[featureDims] != label)
						conf *= 0.5;
				}
				else if(labeled.bit(neighbors[j]))
				{
					if((int)pFriend[featureDims] != label)
						conf *= 0.8;
				}
			}
			pRow[1] = label;
			pRow[2] = conf;
		}
		labelList.sort(2);

		// Assign the labels to the patterns we are most confident about
		size_t maxCount = MAX((size_t)5, pDataLabeled->rows() / 5);
		size_t count = 0;
		for(size_t i = labelList.rows() - 1; i < labelList.rows(); i--)
		{
			double* pRow = labelList.row(i);
			size_t index = (size_t)pRow[0];
			int label = (int)pRow[1];
			pDataUnlabeled->row(index)[featureDims] = label;
			labeled.set(index);
			labelList.deleteRow(i);
			if(count >= maxCount)
				break;
			count++;
		}
	}
}