void RunIrisSupervisedLearning() { // Load training/test dataset Table data = ParseCSVFile("data/iris.data"); // Print out the data to ensure we've loaded it correctly std::cout << "Loaded Data:" << std::endl; PrintTable(data); // Extract feature std::vectors and classes from the loaded data std::vector<SampleType> allSamples = GetFeatureVectors(data); std::vector<std::string> classes = GetClasses(data); // Construct labels compatible with SVMs using the class data // Each class is made an integer. The integers grow one number // apart, so three classes will be assigned the labels 1, 2 and // 3 respectively. std::vector<LabelType> allLabels = ConstructLabels(classes); // Randomise the samples and labels to ensure the normalisation process // does affect the performance of cross validation randomize_samples(allSamples, allLabels); // Split dataset in half - one half being the training set // and one half being the test set. // Done AFTER randomising so half of the data set isn't one class // and half is the other - would result in a very incorrect classifier! unsigned int numTraining = round(allSamples.size() / 2); unsigned int numTest = allSamples.size() - numTraining; std::vector<SampleType> trainingSamples; std::vector<LabelType> trainingLabels; trainingSamples.reserve(numTraining); trainingLabels.reserve(numTraining); std::vector<SampleType> testSamples; std::vector<LabelType> testLabels; testSamples.reserve(numTest); testLabels.reserve(numTest); for (unsigned int i = 0; (i < numTraining); ++i) { trainingSamples.push_back(allSamples[i]); trainingLabels.push_back(allLabels[i]); } for (unsigned int i = numTraining; (i < allSamples.size()); ++i) { testSamples.push_back(allSamples[i]); testLabels.push_back(allLabels[i]); } // Construct a trainer for the problem dlib::krr_trainer<KernelType> trainer; double bestGamma = FindBestGamma(trainer, trainingSamples, trainingLabels); trainer.set_kernel(KernelType(bestGamma)); // Actually TRAIN the classifier using the data, LEARNING the function FunctionType learnedFunction; learnedFunction = trainer.train(trainingSamples, trainingLabels); // NOTE: This should just print out 1 for our training method std::cout << "The number of support vectors in our learned function is " << learnedFunction.basis_vectors.nr() << std::endl; double accuracy = CalculateAccuracy(learnedFunction, testSamples, testLabels); std::cout << "The accuracy of this classifier is: " << (accuracy * 100) << "%." << std::endl; }
binary_relation_detector binary_relation_detector_trainer:: train ( ) const { DLIB_CASSERT(num_positive_examples() > 0, "Not enough training data given."); DLIB_CASSERT(num_negative_examples() > 0, "Not enough training data given."); std::vector<sparse_vector_type> samples; std::vector<double> labels; for (unsigned long i = 0; i < pos_sentences.size(); ++i) { samples.push_back(extract_binary_relation(pos_sentences[i], pos_arg1s[i], pos_arg2s[i], tfe).feats); labels.push_back(+1); } for (unsigned long i = 0; i < neg_sentences.size(); ++i) { samples.push_back(extract_binary_relation(neg_sentences[i], neg_arg1s[i], neg_arg2s[i], tfe).feats); labels.push_back(-1); } randomize_samples(samples, labels); const int cv_folds = 6; brdt_cv_objective obj(num_threads, cv_folds, beta, samples, labels); matrix<double,2,1> params; params = 5000.0/samples.size(), 5000.0/samples.size(); // We do the parameter search in log space. params = log(params); // can't do the parameter search if we don't have enough data. So if we don't // have much data then just use the default parameters. if (pos_sentences.size() > (unsigned)cv_folds) { matrix<double,2,1> lower_params, upper_params; lower_params = 1.0/samples.size(), 1.0/samples.size(); upper_params = 100000.0/samples.size(), 100000.0/samples.size(); lower_params = log(lower_params); upper_params = log(upper_params); const double rho_begin = min(upper_params-lower_params)*0.15; const double rho_end = log(1.2/samples.size()) - log(1.0/samples.size()); find_max_bobyqa(obj, params, params.size()*2+1, lower_params, upper_params, rho_begin, rho_end, 200); } // Note that we rescale the parameters to account for the fact that the cross // validation was done on a dataset slightly smaller than the one we ultimately train // on and the C parameters of this trainer are not normalized by the number of training // samples. params = exp(params) * (cv_folds-1.0)/cv_folds; svm_c_linear_dcd_trainer<sparse_linear_kernel<sparse_vector_type> > trainer; trainer.set_c_class1(params(0)); trainer.set_c_class2(params(1)); cout << "using parameters of: " << trans(params); cout << "now doing training..." << endl; binary_relation_detector bd; bd.df = trainer.train(samples, labels); bd.relation_type = relation_name; bd.total_word_feature_extractor_fingerprint = tfe.get_fingerprint(); cout << "test on train: " << test_binary_decision_function(bd.df, samples, labels) << endl; return bd; }