コード例 #1
0
ファイル: LinearSVM.cpp プロジェクト: idiap/mash
void LinearSVM::distribution(InputSet& inputSet,
							 unsigned int sample,
							 scalar_t* distr) const {
	// Push the selected features if required
	if(!indices_.empty()) {
		inputSet.pushFeatures(indices_);
	}

	// Get the number of features and labels
	const unsigned int nbFeatures = inputSet.nbFeatures();
	const unsigned int nbLabels = inputSet.nbLabels();

	// Make sure that we have a model
	assert(model_);

	// Make sure that there is the same number of labels
	assert(static_cast<unsigned int>(get_nr_class(model_)) <= nbLabels);

	// Create a node
	feature_node node;
	node.dim = nbFeatures;
	node.values = const_cast<scalar_t*>(inputSet.features(sample));

	// The predicted labels
	std::vector<double> predictions(nbLabels);
	predict_values(model_, &node, &predictions[0]);

	// Update the ditribution according to the predictions
	for(unsigned int l = 0; l < nbLabels; ++l) {
		distr[map_[l]] = predictions[l];
	}

	// Pop the selected features if required
	if(!indices_.empty()) {
		inputSet.popFeatures();
	}
}
コード例 #2
0
ファイル: LinearSVM.cpp プロジェクト: idiap/mash
void LinearSVM::train(InputSet& inputSet) {
	// Sample features from every heuristic in order for the matrix of features
	// to fit in memory
	if(inputSet.nbFeatures() > NB_FEATURES_MAX / inputSet.nbSamples()) {
		inputSet.sampleFeatures(NB_FEATURES_MAX / inputSet.nbSamples(), indices_);
		inputSet.pushFeatures(indices_);
	}
	else {
		indices_.clear();
	}

	// Get the number of features, samples and labels
	const unsigned int nbFeatures = inputSet.nbFeatures();
	const unsigned int nbSamples = inputSet.nbSamples();
	const unsigned int nbLabels = inputSet.nbLabels();

	// Delete the previous model
	if(model_) {
		free_and_destroy_model(&model_);
	}

	// Create a new problem
	problem prob;

	// Recopy the number of samples
	prob.l = nbSamples;
	prob.n = nbFeatures;

	// Recopy the labels
	std::vector<int> labels(inputSet.labels(), inputSet.labels() + nbSamples);
	prob.y = &labels[0];

	// Recopy the features (as we want to normalize them)
	std::vector<scalar_t> matrix;
	inputSet.swapFeatures(matrix);

	// Create samples as expected by liblinear
	std::vector<feature_node> samples(nbSamples);
	prob.x = &samples[0];

	// Compute the mean norm
	scalar_t meanNorm = 0;

	for(unsigned int s = 0; s < nbSamples; ++s) {
		samples[s].dim = nbFeatures;
		samples[s].values = &matrix[s * nbFeatures];

		// Add the mean norm of that sample
		meanNorm += nrm2(samples[s].dim, samples[s].values, 1);
	}

	// Divide the sum of the norms by the number of samples
	meanNorm /= nbSamples;

	std::cout << "[LinearSVM::train] mean(norm): " << meanNorm << '.'
			  << std::endl;

	// Rescale the features so that their mean norm is 1
	std::transform(matrix.begin(), matrix.end(), matrix.begin(),
				   std::bind2nd(std::divides<scalar_t>(), meanNorm));

	// Sets the bias to the default value (liblinear doesn't seem to handle the
	// bias parameter value correctly)
	prob.bias = -1;

	// Make sure that the parameters are correct
	bool crossValidate = parameters_.C < 0;

	// Sets C to a default value in order to pass the parameter check
	if(crossValidate) {
		parameters_.C = 1;
	}

	// There is a problem with the parameters
	assert(!check_parameter(&prob, &parameters_));

	// If C is below zero, use 5-folds cross-validation to determine it
	if(crossValidate) {
		std::vector<int> target(nbSamples); // The predicted labels
		unsigned int nbErrorsMin = nbSamples + 1; // Initialize past the maximum

		for(parameters_.C = 1000; parameters_.C >= 0.01; parameters_.C /= 10) {
			cross_validation(&prob, &parameters_, 5, &target[0]);

			// Count the number of errors
			unsigned int nbErrors = 0;

			for(unsigned int s = 0; s < nbSamples; ++s) {
				if(target[s] != labels[s]) {
					++nbErrors;
				}
			}

			std::cout << "[LinearSVM::train] 5 folds cross-validation error "
						 "for C = " << parameters_.C << ": "
					  << nbErrors * 100.0f / nbSamples << "%." << std::endl;

			// The new C is better than the previous one
			if(nbErrors < nbErrorsMin) {
				nbErrorsMin = nbErrors;
			}
			// The optimal C was found
			else {
				break;
			}
		}

		// C got divided one time too much
		parameters_.C *= 10;

		// Print C to the log
		std::cout << "[LinearSVM::train] optimal C as determined by 5 folds "
			   		 "cross-validation: " << parameters_.C << '.' << std::endl;
	}

	// Train the svm
	model_ = ::train(&prob, &parameters_);
	assert(model_);

	// Reset C so that it will be cross-validated again
	if(crossValidate) {
		parameters_.C = -1;
	}

	// Save libsvm labels
	map_.clear();
	map_.resize(nbLabels);
	get_labels(model_, &map_[0]);

	// Pop the selected features if required
	if(!indices_.empty()) {
		inputSet.popFeatures();
	}
}