Esempio n. 1
0
int main(int argc, char* argv[]) {
	bool verbose;
	bool interior_point;
	double nu;
	bool weight_sharing;
	bool force;
	std::string train_filename;
	std::string output_filename;
	std::string solver;
	std::string mpsfile;

	// Command line options
	po::options_description generic("Generic Options");
	generic.add_options()
		("help", "Produce help message")
		("verbose", "Verbose output")
		;

	po::options_description input_options("Input/Output Options");
	input_options.add_options()
		("train", po::value<std::string>
			(&train_filename)->default_value("training.txt"),
			"Training file in \"label s0-m0.txt s0-m1.txt ...\" format, "
			"one sample per row.")
		("output", po::value<std::string>
			(&output_filename)->default_value("output.txt"),
			"File to write weight matrix to.  If \"--weight_sharing 1\" is "
			"used, this is a single line containing the alpha vector.  If "
			"no weight sharing is used, it is a matrix with number-of-classes "
			"rows and number-of-weak-learners columns.")
		("force", po::value<bool>(&force)->default_value(false),
			"Force overwriting the output file.  Otherwise, if the "
			"output file already exists, the program is aborted immediately.")
		("writemps", po::value<std::string>(&mpsfile)->default_value(""),
			"Write linear programming problem as MPS file.")
		;

	po::options_description lpboost_options("LPBoost Options");
	lpboost_options.add_options()
		("nu", po::value<double>(&nu)->default_value(0.1),
			"nu-parameter for 2-class LPBoost.  A larger value "
			"indicates stronger regularization")
		("weight_sharing", po::value<bool>(&weight_sharing)->default_value(true),
			"Share classifier weights among all classes.")
		("interior_point",
			po::value<bool>(&interior_point)->default_value(true),
			"Use interior point (true) or simplex method (false) to "
			"solve the LPBoost master problem")
		("solver", po::value<std::string>(&solver)->default_value("clp"),
			"LP solver to use.  One of \"clp\" or \"mosek\".")
		;

	po::options_description all_options;
	all_options.add(generic).add(input_options).add(lpboost_options);
	po::variables_map vm;
	po::store(po::command_line_parser(argc, argv).options(all_options).run(), vm);
	po::notify(vm);

	// Boolean flags
	verbose = vm.count("verbose");

	if (vm.count("help")) {
		std::cerr << "mclp $Id: mclp.cpp 1229 2008-03-10 10:26:34Z nowozin $" << std::endl;
		std::cerr << "===================================================="
			<< "===========================" << std::endl;
		std::cerr << "Copyright (C) 2008 -- "
			<< "Sebastian Nowozin <*****@*****.**>"
			<< std::endl;
		std::cerr << std::endl;
		std::cerr << "Usage: mclp [options]" << std::endl;
		std::cerr << std::endl;
		std::cerr << "Train a multiclass LPBoost model for given and fixed multiclass "
			<< "weak learners." << std::endl;
		std::cerr << all_options << std::endl;

		exit(EXIT_SUCCESS);
	}

	// Check if output file already exists
	if (boost::filesystem::exists(boost::filesystem::path(output_filename))
		&& force == false) {
		std::cout << "Output file \"" << output_filename << "\" "
			<< "already exists, exiting." << std::endl;
		exit(EXIT_SUCCESS);
	}

	// Read in training data
	std::cout << "Training file: " << train_filename << std::endl;
	std::vector<int> labels;	// discrete class labels, >= 0, < K.
	std::vector<std::vector<std::string> > data_S_M;	// [n][m]
	int number_classes = read_problem(train_filename, labels, data_S_M);
	if (number_classes <= 0) {
		std::cerr << "Failed to read in training data." << std::endl;
		exit(EXIT_FAILURE);
	}
	std::cout << labels.size() << " samples, "
		<< number_classes << " classes." << std::endl;

	// Instantiate multiclass classifier and fill it with training data
	Boosting::LPBoostMulticlassClassifier mlp(number_classes, nu, weight_sharing);
	mlp.InitializeBoosting(labels, interior_point, solver);
	read_problem_data(mlp, data_S_M, number_classes);

	if (mpsfile.empty() == false)
		mlp.WriteMPS(mpsfile);

	// Solve
	std::cout << "Solving linear program..." << std::endl;
	mlp.Update();
	std::cout << "Done." << std::endl;
	std::cout << "Soft margin " << mlp.Rho() << ", objective "
		<< mlp.Gamma() << std::endl;

	// Print weights
	const std::vector<std::vector<double> >& clw = mlp.ClassifierWeights();
	std::cout << "Writing (K,M) weight matrix to \""
		<< output_filename << "\", K = "
		<< (weight_sharing ? 1 : number_classes)
		<< ", M = " << clw[0].size() << std::endl;

	std::ofstream wout(output_filename.c_str());
	if (wout.fail()) {
		std::cerr << "Failed to open \"" << output_filename
			<< "\" for writing." << std::endl;
		exit(EXIT_FAILURE);
	}
	wout << std::setprecision(12);
	for (unsigned int aidx = 0; aidx < clw.size(); ++aidx) {
		for (unsigned int bidx = 0; bidx < clw[aidx].size(); ++bidx) {
			wout << (bidx == 0 ? "" : " ") << clw[aidx][bidx];
		}
		wout << std::endl;
	}
	wout.close();

	exit(EXIT_SUCCESS);
}
Esempio n. 2
0
    //train the svm using the parameters defined inside this method
    void ML2::trainData()
    {

        //file to store the svm model structure
	    string model_file_name1 = flowstatistics_train_name+"_model.csv";
        const char *model_file_name = model_file_name1.c_str();
        //file to read the data from
	    char input_file_name[1024] = "velocityArray.csv";
        //char input_file_name2[1024] = "data/flowstatistics_train_mu.csv";
	    const char *error_msg;

       //parameters of the svm
        /*
       "-s svm_type : set type of SVM (default 0)\n"
	    "	0 -- C-SVC		(multi-class classification)\n"
	    "	1 -- nu-SVC		(multi-class classification)\n"
	    "	2 -- one-class SVM\n"
	    "	3 -- epsilon-SVR	(regression)\n"
	    "	4 -- nu-SVR		(regression)\n"
	    "-t kernel_type : set type of kernel function (default 2)\n"
	    "	0 -- linear: u'*v\n"
	    "	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
	    "	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
	    "	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
	    "	4 -- precomputed kernel (kernel values in training_set_file)\n"
	    "-d degree : set degree in kernel function (default 3)\n"
	    "-g gamma : set gamma in kernel function (default 1/num_features)\n"
	    "-r coef0 : set coef0 in kernel function (default 0)\n"
	    "-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
	    "-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
	    "-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
	    "-m cachesize : set cache memory size in MB (default 100)\n"
	    "-e epsilon : set tolerance of termination criterion (default 0.001)\n"
	    "-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
	    "-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
	    "-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
	    "-v n: n-fold cross validation mode\n"
	    "-q : quiet mode (no outputs)\n"
        */
	    //set the parameters to be used for svm
        param.svm_type = 4;
	    param.kernel_type = 1;//RBF;
	    param.degree = 2;
	    param.gamma = 0.125;	// 1/num_features
	    param.coef0 = 0;
	    param.nu = 0.4;
	    param.cache_size = 100;
	    param.C = 0.125;
	    param.eps = 1e-3;
	    param.p = 0.1;
	    param.shrinking = 1;
	    param.probability = 0;
	    param.nr_weight = 0;
	    param.weight_label = NULL;
	    param.weight = NULL;
        //param.v = 10;
        nr_fold =10;

        //read from the data file
	    read_problem_data( input_file_name, flowcol);

        //checking the parameters, if they are set correctly
	    error_msg = svm_check_parameter(&prob,&param);
	    if(error_msg)
	    {
		    cout<<"ERROR: "<<error_msg<<endl<<flush;
		    exit(1);
	    }

        //do_cross_validation();
        // first do grid search do find optimal parameters 
        //paramSelection();  

        
        // then do training with optimal parameters
        //param.gamma = best_gamma;
	    //param.C = best_C;

        cout<< "start training\n"<<endl<<flush; 
        model = svm_train(&prob,&param);
        cout<< "end training\n"<<endl<<flush; 
        
        // then do cross fold validation
        cout<< "start with cross validation" <<endl<<flush; 
	    do_cross_validation();
        cout<< "end cross validation" <<endl<<flush; 
       
        //save model
	    if(svm_save_model(model_file_name,model))
	    {
		    cout<< "can't save model to file "<< model_file_name <<endl<<flush; 
		    exit(1);
	    } 
	
	
        //free all the pointers used, except the model which is required for prediction
//             svm_destroy_param(&param);
// 	    free(prob.y);
// 	    free(prob.x);
// 	    free(x_space);
// 	    free(line);
	    return;
    }