Beispiel #1
0
Statistical::Statistical(InputSet& inputSet) : Filter(inputSet) {
	// Get the number of samples and features
	const unsigned int nbSamples = inputSet.nbSamples();
	const unsigned int nbFeatures = inputSet.nbFeatures();

	// Used to store the current sum and sum of squares of every feature
	means_.resize(nbFeatures);
	stds_.resize(nbFeatures);

	for(unsigned int s = 0; s < nbSamples; ++s) {
		std::vector<unsigned int> sample(1, s);
		inputSet.pushSamples(sample);

		const scalar_t* features = inputSet.features(0);

		for(unsigned int f = 0; f < nbFeatures; ++f) {
			means_[f] += features[f];
			stds_[f] += features[f] * features[f];
		}

		inputSet.popSamples();
	}

	std::vector<unsigned int> featureStack;

	for(unsigned int f = 0; f < nbFeatures; ++f) {
		scalar_t mean = means_[f] / nbSamples;
		scalar_t variance = (stds_[f] - means_[f] * mean) / (nbSamples - 1);

		if(variance > 0) {
			means_[f] = mean;
			stds_[f] = std::sqrt(variance);

			featureStack.push_back(f);
		}
	}

	// There must be at least one feature
	assert(!featureStack.empty());

	// Push the selected features if needed
	if(featureStack.size() < nbFeatures) {
		featureStack_.push_back(featureStack);
	}
}
Beispiel #2
0
void LinearSVM::distribution(InputSet& inputSet,
							 unsigned int sample,
							 scalar_t* distr) const {
	// Push the selected features if required
	if(!indices_.empty()) {
		inputSet.pushFeatures(indices_);
	}

	// Get the number of features and labels
	const unsigned int nbFeatures = inputSet.nbFeatures();
	const unsigned int nbLabels = inputSet.nbLabels();

	// Make sure that we have a model
	assert(model_);

	// Make sure that there is the same number of labels
	assert(static_cast<unsigned int>(get_nr_class(model_)) <= nbLabels);

	// Create a node
	feature_node node;
	node.dim = nbFeatures;
	node.values = const_cast<scalar_t*>(inputSet.features(sample));

	// The predicted labels
	std::vector<double> predictions(nbLabels);
	predict_values(model_, &node, &predictions[0]);

	// Update the ditribution according to the predictions
	for(unsigned int l = 0; l < nbLabels; ++l) {
		distr[map_[l]] = predictions[l];
	}

	// Pop the selected features if required
	if(!indices_.empty()) {
		inputSet.popFeatures();
	}
}
Beispiel #3
0
void C45Tree::distribution(InputSet& inputSet,
						   unsigned int sample,
						   scalar_t* distr) const {
	// Get the features of the sample to classifiy
	const scalar_t* features = inputSet.features(sample);

	// The currently selected tree
	const C45Tree* current = this;

	// If the tree has children, find which one is selected
	while(current->children_[0]) {
		assert(current->feature_ < inputSet.nbFeatures());
		if(features[current->feature_] <= current->split_) {
			current = current->children_[0];
		}
		else {
			current = current->children_[1];
		}
	}

	// Recopy the distribution
	assert(current->distr_.size() == inputSet.nbLabels());
	std::copy(current->distr_.begin(), current->distr_.end(), distr);
}