void LinearSVM::distribution(InputSet& inputSet, unsigned int sample, scalar_t* distr) const { // Push the selected features if required if(!indices_.empty()) { inputSet.pushFeatures(indices_); } // Get the number of features and labels const unsigned int nbFeatures = inputSet.nbFeatures(); const unsigned int nbLabels = inputSet.nbLabels(); // Make sure that we have a model assert(model_); // Make sure that there is the same number of labels assert(static_cast<unsigned int>(get_nr_class(model_)) <= nbLabels); // Create a node feature_node node; node.dim = nbFeatures; node.values = const_cast<scalar_t*>(inputSet.features(sample)); // The predicted labels std::vector<double> predictions(nbLabels); predict_values(model_, &node, &predictions[0]); // Update the ditribution according to the predictions for(unsigned int l = 0; l < nbLabels; ++l) { distr[map_[l]] = predictions[l]; } // Pop the selected features if required if(!indices_.empty()) { inputSet.popFeatures(); } }
void LinearSVM::train(InputSet& inputSet) { // Sample features from every heuristic in order for the matrix of features // to fit in memory if(inputSet.nbFeatures() > NB_FEATURES_MAX / inputSet.nbSamples()) { inputSet.sampleFeatures(NB_FEATURES_MAX / inputSet.nbSamples(), indices_); inputSet.pushFeatures(indices_); } else { indices_.clear(); } // Get the number of features, samples and labels const unsigned int nbFeatures = inputSet.nbFeatures(); const unsigned int nbSamples = inputSet.nbSamples(); const unsigned int nbLabels = inputSet.nbLabels(); // Delete the previous model if(model_) { free_and_destroy_model(&model_); } // Create a new problem problem prob; // Recopy the number of samples prob.l = nbSamples; prob.n = nbFeatures; // Recopy the labels std::vector<int> labels(inputSet.labels(), inputSet.labels() + nbSamples); prob.y = &labels[0]; // Recopy the features (as we want to normalize them) std::vector<scalar_t> matrix; inputSet.swapFeatures(matrix); // Create samples as expected by liblinear std::vector<feature_node> samples(nbSamples); prob.x = &samples[0]; // Compute the mean norm scalar_t meanNorm = 0; for(unsigned int s = 0; s < nbSamples; ++s) { samples[s].dim = nbFeatures; samples[s].values = &matrix[s * nbFeatures]; // Add the mean norm of that sample meanNorm += nrm2(samples[s].dim, samples[s].values, 1); } // Divide the sum of the norms by the number of samples meanNorm /= nbSamples; std::cout << "[LinearSVM::train] mean(norm): " << meanNorm << '.' << std::endl; // Rescale the features so that their mean norm is 1 std::transform(matrix.begin(), matrix.end(), matrix.begin(), std::bind2nd(std::divides<scalar_t>(), meanNorm)); // Sets the bias to the default value (liblinear doesn't seem to handle the // bias parameter value correctly) prob.bias = -1; // Make sure that the parameters are correct bool crossValidate = parameters_.C < 0; // Sets C to a default value in order to pass the parameter check if(crossValidate) { parameters_.C = 1; } // There is a problem with the parameters assert(!check_parameter(&prob, ¶meters_)); // If C is below zero, use 5-folds cross-validation to determine it if(crossValidate) { std::vector<int> target(nbSamples); // The predicted labels unsigned int nbErrorsMin = nbSamples + 1; // Initialize past the maximum for(parameters_.C = 1000; parameters_.C >= 0.01; parameters_.C /= 10) { cross_validation(&prob, ¶meters_, 5, &target[0]); // Count the number of errors unsigned int nbErrors = 0; for(unsigned int s = 0; s < nbSamples; ++s) { if(target[s] != labels[s]) { ++nbErrors; } } std::cout << "[LinearSVM::train] 5 folds cross-validation error " "for C = " << parameters_.C << ": " << nbErrors * 100.0f / nbSamples << "%." << std::endl; // The new C is better than the previous one if(nbErrors < nbErrorsMin) { nbErrorsMin = nbErrors; } // The optimal C was found else { break; } } // C got divided one time too much parameters_.C *= 10; // Print C to the log std::cout << "[LinearSVM::train] optimal C as determined by 5 folds " "cross-validation: " << parameters_.C << '.' << std::endl; } // Train the svm model_ = ::train(&prob, ¶meters_); assert(model_); // Reset C so that it will be cross-validated again if(crossValidate) { parameters_.C = -1; } // Save libsvm labels map_.clear(); map_.resize(nbLabels); get_labels(model_, &map_[0]); // Pop the selected features if required if(!indices_.empty()) { inputSet.popFeatures(); } }