Exemplo n.º 1
0
// virtual
void GBucket::trainInner(GMatrix& features, GMatrix& labels)
{
	size_t nLearnerCount = m_models.size();
	double dBestError = 1e200;
	GSupervisedLearner* pLearner;
	m_nBestLearner = (size_t)m_rand.next(nLearnerCount);
	double err;
	for(size_t i = 0; i < nLearnerCount; i++)
	{
		pLearner = m_models[i];
		try
		{
			err = pLearner->heuristicValidate(features, labels);
		}
		catch(std::exception& e)
		{
			onError(e);
			continue;
		}
		if(err < dBestError)
		{
			dBestError = err;
			m_nBestLearner = i;
		}
		pLearner->clear();
	}
	pLearner = m_models[m_nBestLearner];
	pLearner->train(features, labels);
}
Exemplo n.º 2
0
// virtual
void GBucket::trainInner(const GMatrix& features, const GMatrix& labels)
{
	size_t nLearnerCount = m_models.size();
	double dBestError = 1e200;
	GSupervisedLearner* pLearner;
	m_nBestLearner = (size_t)m_rand.next(nLearnerCount);
	double err;
	for(size_t i = 0; i < nLearnerCount; i++)
	{
		pLearner = m_models[i];
		err = pLearner->crossValidate(features, labels, 2);
		if(err < dBestError)
		{
			dBestError = err;
			m_nBestLearner = i;
		}
		pLearner->clear();
	}
	pLearner = m_models[m_nBestLearner];
	pLearner->train(features, labels);
}
Exemplo n.º 3
0
// virtual
void GResamplingAdaBoost::trainInnerInner(const GMatrix& features, const GMatrix& labels)
{
	clear();

	// Initialize all instances with uniform weights
	GVec pDistribution(features.rows());
	pDistribution.fill(1.0 / features.rows());
	size_t drawRows = size_t(m_trainSize * features.rows());
	size_t* pDrawnIndexes = new size_t[drawRows];
	std::unique_ptr<size_t[]> hDrawnIndexes(pDrawnIndexes);

	// Train the ensemble
	size_t labelDims = labels.cols();
	double penalty = 1.0 / labelDims;
	GVec prediction(labelDims);
	for(size_t es = 0; es < m_ensembleSize; es++)
	{
		// Draw a training set from the distribution
		GCategoricalSamplerBatch csb(features.rows(), pDistribution, m_rand);
		csb.draw(drawRows, pDrawnIndexes);
		GMatrix drawnFeatures(features.relation().clone());
		GReleaseDataHolder hDrawnFeatures(&drawnFeatures);
		GMatrix drawnLabels(labels.relation().clone());
		GReleaseDataHolder hDrawnLabels(&drawnLabels);
		size_t* pIndex = pDrawnIndexes;
		for(size_t i = 0; i < drawRows; i++)
		{
			drawnFeatures.takeRow((GVec*)&features[*pIndex]);
			drawnLabels.takeRow((GVec*)&labels[*pIndex]);
			pIndex++;
		}

		// Train an instance of the model and store a clone of it
		m_pLearner->train(drawnFeatures, drawnLabels);
		GDom doc;
		GSupervisedLearner* pClone = m_pLoader->loadLearner(m_pLearner->serialize(&doc));

		// Compute model weight
		double err = 0.5;
		for(size_t i = 0; i < features.rows(); i++)
		{
			pClone->predict(features[i], prediction);
			const GVec& target = labels[i];
			for(size_t j = 0; j < labelDims; j++)
			{
				if((int)target[j] != (int)prediction[j])
					err += penalty;
			}
		}
		err /= features.rows();
		if(err >= 0.5)
		{
			delete(pClone);
			break;
		}
		double weight = 0.5 * log((1.0 - err) / err);
		m_models.push_back(new GWeightedModel(weight, pClone));

		// Update the distribution to favor mis-classified instances
		for(size_t i = 0; i < features.rows(); i++)
		{
			err = 0.0;
			pClone->predict(features[i], prediction);
			const GVec& target = labels[i];
			for(size_t j = 0; j < labelDims; j++)
			{
				if((int)target[j] != (int)prediction[j])
					err += penalty;
			}
			err /= labelDims;
			pDistribution[i] *= exp(weight * (err * 2.0 - 1.0));
		}
		pDistribution.sumToOne();
	}
	normalizeWeights();
}
Exemplo n.º 4
0
// virtual
void GResamplingAdaBoost::trainInnerInner(GMatrix& features, GMatrix& labels)
{
	clear();

	// Initialize all instances with uniform weights
	double* pDistribution = new double[features.rows()];
	ArrayHolder<double> hDistribution(pDistribution);
	GVec::setAll(pDistribution, 1.0 / features.rows(), features.rows());
	size_t drawRows = size_t(m_trainSize * features.rows());
	size_t* pDrawnIndexes = new size_t[drawRows];
	ArrayHolder<size_t> hDrawnIndexes(pDrawnIndexes);

	// Train the ensemble
	size_t labelDims = labels.cols();
	double penalty = 1.0 / labelDims;
	GTEMPBUF(double, prediction, labelDims);
	for(size_t es = 0; es < m_ensembleSize; es++)
	{
		// Draw a training set from the distribution
		GCategoricalSamplerBatch csb(features.rows(), pDistribution, m_rand);
		csb.draw(drawRows, pDrawnIndexes);
		GMatrix drawnFeatures(features.relation());
		GReleaseDataHolder hDrawnFeatures(&drawnFeatures);
		GMatrix drawnLabels(labels.relation());
		GReleaseDataHolder hDrawnLabels(&drawnLabels);
		size_t* pIndex = pDrawnIndexes;
		for(size_t i = 0; i < drawRows; i++)
		{
			drawnFeatures.takeRow(features[*pIndex]);
			drawnLabels.takeRow(labels[*pIndex]);
			pIndex++;
		}

		// Train an instance of the model and store a clone of it
		m_pLearner->train(drawnFeatures, drawnLabels);
		GDom doc;
		GSupervisedLearner* pClone = m_pLoader->loadSupervisedLearner(m_pLearner->serialize(&doc));

		// Compute model weight
		double err = 0.0;
		for(size_t i = 0; i < features.rows(); i++)
		{
			pClone->predict(features[i], prediction);
			double* pTarget = labels[i];
			double* pPred = prediction;
			for(size_t j = 0; j < labelDims; j++)
			{
				if((int)*(pTarget++) != (int)*(pPred++))
					err += penalty;
			}
		}
		err /= features.rows();
		if(err >= 0.5)
		{
			delete(pClone);
			break;
		}
		double weight = 0.5 * log((1.0 - err) / err);
		m_models.push_back(new GWeightedModel(weight, pClone));

		// Update the distribution to favor mis-classified instances
		double* pDist = pDistribution;
		for(size_t i = 0; i < features.rows(); i++)
		{
			err = 0.0;
			pClone->predict(features[i], prediction);
			double* pTarget = labels[i];
			double* pPred = prediction;
			for(size_t j = 0; j < labelDims; j++)
			{
				if((int)*(pTarget++) != (int)*(pPred++))
					err += penalty;
			}
			err /= labelDims;
			*pDist *= exp(weight * (err * 2.0 - 1.0));
			pDist++;
		}
		GVec::sumToOne(pDistribution, features.rows());
	}
	normalizeWeights();
}