Statistical::Statistical(InputSet& inputSet) : Filter(inputSet) { // Get the number of samples and features const unsigned int nbSamples = inputSet.nbSamples(); const unsigned int nbFeatures = inputSet.nbFeatures(); // Used to store the current sum and sum of squares of every feature means_.resize(nbFeatures); stds_.resize(nbFeatures); for(unsigned int s = 0; s < nbSamples; ++s) { std::vector<unsigned int> sample(1, s); inputSet.pushSamples(sample); const scalar_t* features = inputSet.features(0); for(unsigned int f = 0; f < nbFeatures; ++f) { means_[f] += features[f]; stds_[f] += features[f] * features[f]; } inputSet.popSamples(); } std::vector<unsigned int> featureStack; for(unsigned int f = 0; f < nbFeatures; ++f) { scalar_t mean = means_[f] / nbSamples; scalar_t variance = (stds_[f] - means_[f] * mean) / (nbSamples - 1); if(variance > 0) { means_[f] = mean; stds_[f] = std::sqrt(variance); featureStack.push_back(f); } } // There must be at least one feature assert(!featureStack.empty()); // Push the selected features if needed if(featureStack.size() < nbFeatures) { featureStack_.push_back(featureStack); } }
void LinearSVM::distribution(InputSet& inputSet, unsigned int sample, scalar_t* distr) const { // Push the selected features if required if(!indices_.empty()) { inputSet.pushFeatures(indices_); } // Get the number of features and labels const unsigned int nbFeatures = inputSet.nbFeatures(); const unsigned int nbLabels = inputSet.nbLabels(); // Make sure that we have a model assert(model_); // Make sure that there is the same number of labels assert(static_cast<unsigned int>(get_nr_class(model_)) <= nbLabels); // Create a node feature_node node; node.dim = nbFeatures; node.values = const_cast<scalar_t*>(inputSet.features(sample)); // The predicted labels std::vector<double> predictions(nbLabels); predict_values(model_, &node, &predictions[0]); // Update the ditribution according to the predictions for(unsigned int l = 0; l < nbLabels; ++l) { distr[map_[l]] = predictions[l]; } // Pop the selected features if required if(!indices_.empty()) { inputSet.popFeatures(); } }
void C45Tree::distribution(InputSet& inputSet, unsigned int sample, scalar_t* distr) const { // Get the features of the sample to classifiy const scalar_t* features = inputSet.features(sample); // The currently selected tree const C45Tree* current = this; // If the tree has children, find which one is selected while(current->children_[0]) { assert(current->feature_ < inputSet.nbFeatures()); if(features[current->feature_] <= current->split_) { current = current->children_[0]; } else { current = current->children_[1]; } } // Recopy the distribution assert(current->distr_.size() == inputSet.nbLabels()); std::copy(current->distr_.begin(), current->distr_.end(), distr); }