int main(int argc, char* argv[]) { bool verbose; bool interior_point; double nu; bool weight_sharing; bool force; std::string train_filename; std::string output_filename; std::string solver; std::string mpsfile; // Command line options po::options_description generic("Generic Options"); generic.add_options() ("help", "Produce help message") ("verbose", "Verbose output") ; po::options_description input_options("Input/Output Options"); input_options.add_options() ("train", po::value<std::string> (&train_filename)->default_value("training.txt"), "Training file in \"label s0-m0.txt s0-m1.txt ...\" format, " "one sample per row.") ("output", po::value<std::string> (&output_filename)->default_value("output.txt"), "File to write weight matrix to. If \"--weight_sharing 1\" is " "used, this is a single line containing the alpha vector. If " "no weight sharing is used, it is a matrix with number-of-classes " "rows and number-of-weak-learners columns.") ("force", po::value<bool>(&force)->default_value(false), "Force overwriting the output file. Otherwise, if the " "output file already exists, the program is aborted immediately.") ("writemps", po::value<std::string>(&mpsfile)->default_value(""), "Write linear programming problem as MPS file.") ; po::options_description lpboost_options("LPBoost Options"); lpboost_options.add_options() ("nu", po::value<double>(&nu)->default_value(0.1), "nu-parameter for 2-class LPBoost. A larger value " "indicates stronger regularization") ("weight_sharing", po::value<bool>(&weight_sharing)->default_value(true), "Share classifier weights among all classes.") ("interior_point", po::value<bool>(&interior_point)->default_value(true), "Use interior point (true) or simplex method (false) to " "solve the LPBoost master problem") ("solver", po::value<std::string>(&solver)->default_value("clp"), "LP solver to use. One of \"clp\" or \"mosek\".") ; po::options_description all_options; all_options.add(generic).add(input_options).add(lpboost_options); po::variables_map vm; po::store(po::command_line_parser(argc, argv).options(all_options).run(), vm); po::notify(vm); // Boolean flags verbose = vm.count("verbose"); if (vm.count("help")) { std::cerr << "mclp $Id: mclp.cpp 1229 2008-03-10 10:26:34Z nowozin $" << std::endl; std::cerr << "====================================================" << "===========================" << std::endl; std::cerr << "Copyright (C) 2008 -- " << "Sebastian Nowozin <*****@*****.**>" << std::endl; std::cerr << std::endl; std::cerr << "Usage: mclp [options]" << std::endl; std::cerr << std::endl; std::cerr << "Train a multiclass LPBoost model for given and fixed multiclass " << "weak learners." << std::endl; std::cerr << all_options << std::endl; exit(EXIT_SUCCESS); } // Check if output file already exists if (boost::filesystem::exists(boost::filesystem::path(output_filename)) && force == false) { std::cout << "Output file \"" << output_filename << "\" " << "already exists, exiting." << std::endl; exit(EXIT_SUCCESS); } // Read in training data std::cout << "Training file: " << train_filename << std::endl; std::vector<int> labels; // discrete class labels, >= 0, < K. std::vector<std::vector<std::string> > data_S_M; // [n][m] int number_classes = read_problem(train_filename, labels, data_S_M); if (number_classes <= 0) { std::cerr << "Failed to read in training data." << std::endl; exit(EXIT_FAILURE); } std::cout << labels.size() << " samples, " << number_classes << " classes." << std::endl; // Instantiate multiclass classifier and fill it with training data Boosting::LPBoostMulticlassClassifier mlp(number_classes, nu, weight_sharing); mlp.InitializeBoosting(labels, interior_point, solver); read_problem_data(mlp, data_S_M, number_classes); if (mpsfile.empty() == false) mlp.WriteMPS(mpsfile); // Solve std::cout << "Solving linear program..." << std::endl; mlp.Update(); std::cout << "Done." << std::endl; std::cout << "Soft margin " << mlp.Rho() << ", objective " << mlp.Gamma() << std::endl; // Print weights const std::vector<std::vector<double> >& clw = mlp.ClassifierWeights(); std::cout << "Writing (K,M) weight matrix to \"" << output_filename << "\", K = " << (weight_sharing ? 1 : number_classes) << ", M = " << clw[0].size() << std::endl; std::ofstream wout(output_filename.c_str()); if (wout.fail()) { std::cerr << "Failed to open \"" << output_filename << "\" for writing." << std::endl; exit(EXIT_FAILURE); } wout << std::setprecision(12); for (unsigned int aidx = 0; aidx < clw.size(); ++aidx) { for (unsigned int bidx = 0; bidx < clw[aidx].size(); ++bidx) { wout << (bidx == 0 ? "" : " ") << clw[aidx][bidx]; } wout << std::endl; } wout.close(); exit(EXIT_SUCCESS); }
//train the svm using the parameters defined inside this method void ML2::trainData() { //file to store the svm model structure string model_file_name1 = flowstatistics_train_name+"_model.csv"; const char *model_file_name = model_file_name1.c_str(); //file to read the data from char input_file_name[1024] = "velocityArray.csv"; //char input_file_name2[1024] = "data/flowstatistics_train_mu.csv"; const char *error_msg; //parameters of the svm /* "-s svm_type : set type of SVM (default 0)\n" " 0 -- C-SVC (multi-class classification)\n" " 1 -- nu-SVC (multi-class classification)\n" " 2 -- one-class SVM\n" " 3 -- epsilon-SVR (regression)\n" " 4 -- nu-SVR (regression)\n" "-t kernel_type : set type of kernel function (default 2)\n" " 0 -- linear: u'*v\n" " 1 -- polynomial: (gamma*u'*v + coef0)^degree\n" " 2 -- radial basis function: exp(-gamma*|u-v|^2)\n" " 3 -- sigmoid: tanh(gamma*u'*v + coef0)\n" " 4 -- precomputed kernel (kernel values in training_set_file)\n" "-d degree : set degree in kernel function (default 3)\n" "-g gamma : set gamma in kernel function (default 1/num_features)\n" "-r coef0 : set coef0 in kernel function (default 0)\n" "-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n" "-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n" "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n" "-m cachesize : set cache memory size in MB (default 100)\n" "-e epsilon : set tolerance of termination criterion (default 0.001)\n" "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n" "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n" "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n" "-v n: n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" */ //set the parameters to be used for svm param.svm_type = 4; param.kernel_type = 1;//RBF; param.degree = 2; param.gamma = 0.125; // 1/num_features param.coef0 = 0; param.nu = 0.4; param.cache_size = 100; param.C = 0.125; param.eps = 1e-3; param.p = 0.1; param.shrinking = 1; param.probability = 0; param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; //param.v = 10; nr_fold =10; //read from the data file read_problem_data( input_file_name, flowcol); //checking the parameters, if they are set correctly error_msg = svm_check_parameter(&prob,¶m); if(error_msg) { cout<<"ERROR: "<<error_msg<<endl<<flush; exit(1); } //do_cross_validation(); // first do grid search do find optimal parameters //paramSelection(); // then do training with optimal parameters //param.gamma = best_gamma; //param.C = best_C; cout<< "start training\n"<<endl<<flush; model = svm_train(&prob,¶m); cout<< "end training\n"<<endl<<flush; // then do cross fold validation cout<< "start with cross validation" <<endl<<flush; do_cross_validation(); cout<< "end cross validation" <<endl<<flush; //save model if(svm_save_model(model_file_name,model)) { cout<< "can't save model to file "<< model_file_name <<endl<<flush; exit(1); } //free all the pointers used, except the model which is required for prediction // svm_destroy_param(¶m); // free(prob.y); // free(prob.x); // free(x_space); // free(line); return; }