예제 #1
0
파일: main.cpp 프로젝트: litaoshao/waffles
void transition(GArgReader& args)
{
	// Load the input data
	GMatrix* pActions = loadData(args.pop_string());
	Holder<GMatrix> hActions(pActions);
	GMatrix* pState = loadData(args.pop_string());
	Holder<GMatrix> hState(pState);
	if(pState->rows() != pActions->rows())
		ThrowError("Expected the same number of rows in both datasets");

	// Parse options
	bool delta = false;
	while(args.size() > 0)
	{
		if(args.if_pop("-delta"))
			delta = true;
		else
			ThrowError("Invalid option: ", args.peek());
	}

	// Make the output data
	size_t actionDims = pActions->cols();
	size_t stateDims = pState->cols();
	GMixedRelation* pRelation = new GMixedRelation();
	sp_relation pRel = pRelation;
	pRelation->addAttrs(pActions->relation().get());
	pRelation->addAttrs(stateDims + stateDims, 0);
	GMatrix* pTransition = new GMatrix(pRel);
	pTransition->newRows(pActions->rows() - 1);
	for(size_t i = 0; i < pActions->rows() - 1; i++)
	{
		double* pOut = pTransition->row(i);
		GVec::copy(pOut, pActions->row(i), actionDims);
		GVec::copy(pOut + actionDims, pState->row(i), stateDims);
		GVec::copy(pOut + actionDims + stateDims, pState->row(i + 1), stateDims);
		if(delta)
			GVec::subtract(pOut + actionDims + stateDims, pState->row(i), stateDims);
	}
	pTransition->print(cout);
}
예제 #2
0
// virtual
GMatrix* GAgglomerativeTransducer::transduceInner(const GMatrix& features1, const GMatrix& labels1, const GMatrix& features2)
{
	// Init the metric
	if(!m_pMetric)
		setMetric(new GRowDistance(), true);
	m_pMetric->init(&features1.relation(), false);

	// Make a dataset with all featuers
	GMatrix featuresAll(features1.relation().clone());
	featuresAll.reserve(features1.rows() + features2.rows());
	GReleaseDataHolder hFeaturesAll(&featuresAll);
	for(size_t i = 0; i < features1.rows(); i++)
		featuresAll.takeRow((double*)features1[i]);
	for(size_t i = 0; i < features2.rows(); i++)
		featuresAll.takeRow((double*)features2[i]);

	// Find enough neighbors to form a connected graph
	GNeighborGraph* pNF = NULL;
	size_t neighbors = 6;
	while(true)
	{
		GKdTree* pKdTree = new GKdTree(&featuresAll, neighbors, m_pMetric, false);
		pNF = new GNeighborGraph(pKdTree, true);
		pNF->fillCache();
		if(pNF->isConnected())
			break;
		if(neighbors + 1 >= featuresAll.rows())
		{
			delete(pNF);
			throw Ex("internal problem--a graph with so many neighbors must be connected");
		}
		neighbors = std::min((neighbors * 3) / 2, featuresAll.rows() - 1);
	}

	// Sort all the neighbors by their distances
	size_t count = featuresAll.rows() * neighbors;
	vector< std::pair<double,size_t> > distNeighs;
	distNeighs.resize(count);
	double* pDistances = pNF->squaredDistanceTable();
	size_t* pRows = pNF->cache();
	size_t index = 0;
	vector< std::pair<double,size_t> >::iterator it = distNeighs.begin();
	for(size_t i = 0; i < count; i++)
	{
		if(*pRows < featuresAll.rows())
		{
			it->first = *pDistances;
			it->second = i;
			it++;
		}
		else
			index--;
		pRows++;
		pDistances++;
	}
	std::sort(distNeighs.begin(), it);

	// Transduce
	GMatrix* pOut = new GMatrix(labels1.relation().clone());
	Holder<GMatrix> hOut(pOut);
	pOut->newRows(features2.rows());
	pOut->setAll(-1);
	size_t* pSiblings = new size_t[featuresAll.rows()]; // a cyclical linked list of each row in the cluster
	ArrayHolder<size_t> hSiblings(pSiblings);
	for(size_t lab = 0; lab < labels1.cols(); lab++)
	{
		// Assign each row to its own cluster
		GIndexVec::makeIndexVec(pSiblings, featuresAll.rows()); // init such that each row is in a cluster of 1
		size_t missingLabels = features2.rows();

		// Merge until we have the desired number of clusters
		pRows = pNF->cache();
		for(vector< std::pair<double,size_t> >::iterator dn = distNeighs.begin(); dn != it; dn++)
		{
			// Get the next two closest points
			size_t a = dn->second / neighbors;
			size_t b = pRows[dn->second];
			GAssert(a != b && a < featuresAll.rows() && b < featuresAll.rows());
			int labelA = (a < features1.rows() ? (int)labels1[a][lab] : (int)pOut->row(a - features1.rows())[lab]);
			int labelB = (b < features1.rows() ? (int)labels1[b][lab] : (int)pOut->row(b - features1.rows())[lab]);

			// Merge the clusters
			if(labelA >= 0 && labelB >= 0)
				continue; // Both points are already labeled, so there is no point in merging their clusters
			if(labelA < 0 && labelB >= 0) // Make sure that if one of them has a valid label, it is point a
			{
				std::swap(a, b);
				std::swap(labelA, labelB);
			}
			if(labelA >= 0)
			{
				for(size_t i = pSiblings[b]; true; i = pSiblings[i]) // Label every row in cluster b
				{
					GAssert(i >= features1.rows());
					GAssert(pOut->row(i - features1.rows())[lab] == (double)-1);
					pOut->row(i - features1.rows())[lab] = labelA;
					missingLabels--;
					if(i == b)
						break;
				}
				if(missingLabels <= 0)
					break;
			}
			std::swap(pSiblings[a], pSiblings[b]); // This line joins the cyclical linked lists into one big cycle
		}
	}
	return hOut.release();
}
예제 #3
0
// virtual
GMatrix* GGraphCutTransducer::transduceInner(const GMatrix& features1, const GMatrix& labels1, const GMatrix& features2)
{
	// Use k-NN to compute a distance metric with good scale factors for prediction
	GKNN knn;
	knn.setNeighborCount(m_neighborCount);
	//knn.setOptimizeScaleFactors(true);
	knn.train(features1, labels1);
	GRowDistanceScaled* pMetric = knn.metric();

	// Merge the features into one dataset and build a kd-tree
	GMatrix both(features1.relation().clone());
	GReleaseDataHolder hBoth(&both);
	both.reserve(features1.rows() + features2.rows());
	for(size_t i = 0; i < features1.rows(); i++)
		both.takeRow((double*)features1[i]);
	for(size_t i = 0; i < features2.rows(); i++)
		both.takeRow((double*)features2[i]);
	GRowDistanceScaled metric2;
	GKdTree neighborFinder(&both, m_neighborCount, &metric2, false);
	GVec::copy(metric2.scaleFactors(), pMetric->scaleFactors(), features1.cols());

	// Transduce
	GMatrix* pOut = new GMatrix(labels1.relation().clone());
	Holder<GMatrix> hOut(pOut);
	pOut->newRows(features2.rows());
	pOut->setAll(0);
	for(size_t lab = 0; lab < labels1.cols(); lab++)
	{
		// Use max-flow/min-cut graph-cut to separate out each label value
		int valueCount = (int)labels1.relation().valueCount(lab);
		for(int val = 1; val < valueCount; val++)
		{
			// Add neighborhood edges
			GGraphCut gc(features1.rows() + features2.rows() + 2);
			for(size_t i = 0; i < both.rows(); i++)
			{
				neighborFinder.neighbors(m_pNeighbors, m_pDistances, i);
				for(size_t j = 0; j < m_neighborCount; j++)
				{
					if(m_pNeighbors[j] >= both.rows())
						continue;
					gc.addEdge(2 + i, 2 + m_pNeighbors[j], (float)(1.0 / std::max(sqrt(m_pDistances[j]), 1e-9))); // connect neighbors
				}
			}

			// Add source and sink edges
			for(size_t i = 0; i < features1.rows(); i++)
			{
				if((int)labels1[i][0] == val)
					gc.addEdge(0, 2 + i, 1e12f); // connect to source
				else
					gc.addEdge(1, 2 + i, 1e12f); // connect to sink
			}

			// Cut
			gc.cut(0, 1);

			// Label the unlabeled rows
			for(size_t i = 0; i < features2.rows(); i++)
			{
				if(gc.isSource(2 + features1.rows() + i))
					pOut->row(i)[lab] = (double)val;
			}
		}
	}
	return hOut.release();
}