Example #1
0
void do_cross_validation(const problem * prob)
{    
    int total_correct = 0;
    cross_validation(prob,&param,nr_fold,&total_correct);
    if (param.rank == param.root)
        printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/bprob.l);
}
void LVlinear_cross_validation(lvError *lvErr, const LVlinear_problem *prob_in, const LVlinear_parameter *param_in, const int32_t nr_fold, LVArray_Hdl<double> target_out){
	try{
		// Input verification: Problem dimensions
		if ((*(prob_in->x))->dimSize != (*(prob_in->y))->dimSize)
			throw LVException(__FILE__, __LINE__, "The problem must have an equal number of labels and feature vectors (x and y).");

		// Convert LVsvm_problem to svm_problem
		std::unique_ptr<problem> prob(new problem);
		uint32_t nr_nodes = (*(prob_in->y))->dimSize;
		prob->l = nr_nodes;
		prob->y = (*(prob_in->y))->elt;

		// Create and array of pointers (sparse datastructure)
		std::unique_ptr<feature_node*[]> x(new feature_node*[nr_nodes]);
		prob->x = x.get();

		auto x_in = prob_in->x;
		for (unsigned int i = 0; i < (*x_in)->dimSize; i++){
			// Assign the innermost svm_node array pointers to the array of pointers
			auto xi_in_Hdl = (*x_in)->elt[i];
			x[i] = reinterpret_cast<feature_node*>((*xi_in_Hdl)->elt);
		}

		// Assign parameters to svm_parameter
		std::unique_ptr<parameter> param(new parameter());
		LVConvertParameter(param_in, param.get());

		// Verify parameters
		const char * param_check = check_parameter(prob.get(), param.get());
		if (param_check != nullptr)
			throw LVException(__FILE__, __LINE__, "Parameter check failed with the following error: " + std::string(param_check));

		// Allocate room in target_out
		LVResizeNumericArrayHandle(target_out, nr_nodes);

		// Run cross validation
		cross_validation(prob.get(), param.get(), nr_fold, (*target_out)->elt);
		(*target_out)->dimSize = nr_nodes;
	}
	catch (LVException &ex) {
		ex.returnError(lvErr);
		(*target_out)->dimSize = 0;
	}
	catch (std::exception &ex) {
		LVException::returnStdException(lvErr, __FILE__, __LINE__, ex);
		(*target_out)->dimSize = 0;
	}
	catch (...) {
		LVException ex(__FILE__, __LINE__, "Unknown exception has occurred");
		ex.returnError(lvErr);
		(*target_out)->dimSize = 0;
	}
}
Example #3
0
void do_cross_validation()
{
	int i;
	int total_correct = 0;
	int *target = Malloc(int, prob.l);

	cross_validation(&prob,&param,nr_fold,target);

	for(i=0;i<prob.l;i++)
		if(target[i] == prob.y[i])
			++total_correct;
	printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);

	free(target);
}
Example #4
0
double do_cross_validation()
{
	int i;
	int total_correct = 0;
	int *target = Malloc(int,prob.l);
	double retval = 0.0;

	cross_validation(&prob,&param,nr_fold,target);

	for(i=0;i<prob.l;i++)
		if(target[i] == prob.y[i])
			++total_correct;
	mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
	retval = 100.0*total_correct/prob.l;

	free(target);
	return retval;
}
Example #5
0
File: train.cpp Project: cental/stc
double do_cross_validation(int nr_folds)
{
	int i;
	int total_correct = 0;
	int *target = Malloc(int, prob.l);

	cross_validation(&prob,&param,nr_folds,target);
	
	for(i=0;i<prob.l;i++)
		if(target[i] == prob.y[i])
			++total_correct;
	printf("prob.l=%d\n",prob.l);
	double accuracy = 100.0*total_correct/prob.l;
	printf("Cross Validation Accuracy = %f\n", accuracy);
	free(target);
	return accuracy;
	
}
double do_cross_validation()
{
	int i;
	int total_correct = 0;
	double total_error = 0;
	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
	double *target = Malloc(double, prob.l);
	double retval = 0.0;

	cross_validation(&prob,&param,nr_fold,target);
	if(param.solver_type == L2R_L2LOSS_SVR || 
	   param.solver_type == L2R_L1LOSS_SVR_DUAL || 
	   param.solver_type == L2R_L2LOSS_SVR_DUAL)
	{
		for(i=0;i<prob.l;i++)
                {
                        double y = prob.y[i];
                        double v = target[i];
                        total_error += (v-y)*(v-y);
                        sumv += v;
                        sumy += y;
                        sumvv += v*v;
                        sumyy += y*y;
                        sumvy += v*y;
                }
                printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
                printf("Cross Validation Squared correlation coefficient = %g\n",
                        ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
                        ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
                        );
		retval = total_error/prob.l;
	}
	else
	{
		for(i=0;i<prob.l;i++)
			if(target[i] == prob.y[i])
				++total_correct;
		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
		retval = 100.0*total_correct/prob.l;
	}

	free(target);
	return retval;
}
Example #7
0
    void doc_manager::populate(void)
    {
        
        add_class_descriptor(ml::k_base);
        
        add_class_descriptors(ml::k_base, {
            ml::k_classification,
            ml::k_regression
        });
        
        add_class_descriptors(ml::k_regression, {
            ml::k_ann,
            ml::k_linreg,
            ml::k_logreg
        });
        
        add_class_descriptors(ml::k_classification, {
            ml::k_svm,
            ml::k_adaboost,
            ml::k_anbc,
            ml::k_dtw,
            ml::k_hmmc,
            ml::k_softmax,
            ml::k_randforest,
            ml::k_mindist,
            ml::k_knn,
            ml::k_gmm,
            ml::k_dtree
        });
        
        add_class_descriptors(ml::k_feature_extraction, {
            ml::k_peak,
            ml::k_minmax,
            ml::k_zerox
        });
        
        descriptors[ml::k_ann].desc("Artificial Neural Network").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/MLP");
        descriptors[ml::k_linreg].desc("Linear Regression").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/LinearRegression");
        descriptors[ml::k_logreg].desc("Logistic Regression").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/LogisticRegression");
        descriptors[ml::k_peak].desc("Peak Detection").url("").num_outlets(1);
        descriptors[ml::k_minmax].desc("Minimum / Maximum Detection").url("").num_outlets(1);
        descriptors[ml::k_zerox].desc("Zero Crossings Detection").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/ZeroCrossingCounter");
        descriptors[ml::k_svm].desc("Support Vector Machine").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/SVM");
        descriptors[ml::k_adaboost].desc("Adaptive Boosting").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/AdaBoost");
        descriptors[ml::k_anbc].desc("Adaptive Naive Bayes Classifier").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/ANBC");
        descriptors[ml::k_dtw].desc("Dynamic Time Warping").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/DTW");
        descriptors[ml::k_hmmc].desc("Continuous Hidden Markov Model").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/HMM");
        descriptors[ml::k_softmax].desc("Softmax Classifier").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/Softmax");
        descriptors[ml::k_randforest].desc("Random Forests").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/RandomForests");
        descriptors[ml::k_mindist].desc("Minimum Distance").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/MinDist");
        descriptors[ml::k_knn].desc("K Nearest Neighbour").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/KNN");
        descriptors[ml::k_gmm].desc("Gaussian Mixture Model").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/GMMClassifier");
        descriptors[ml::k_dtree].desc("Decision Trees").url("http://www.nickgillian.com/wiki/pmwiki.php/GRT/DecisionTree");
        
        for (auto& desc : {&descriptors[ml::k_hmmc], &descriptors[ml::k_dtw]})
        {
            desc->notes(
                        "add and map messages for time series should be delimited with record messages, e.g. record 1, add 1 40 50, add 1 41 50, record 0"
            );
        }
        
        // base descriptor
        message_descriptor add(
                              "add",
                              "list comprising a class id followed by n features, <class> <feature 1> <feature 2> etc",
                               "1 0.2 0.7 0.3 0.1"
                              );

        
        message_descriptor train(
                                "train",
                                "train the model based on vectors added with 'add'"
                                );
        
        message_descriptor map(
                              "map",
                              "generate the output value(s) for the input feature vector",
                               "0.2 0.7 0.3 0.1"
                              );
        
        message_descriptor write(
                                 "write",
                                 "write training data and / or model, first argument gives path to write file",
                                 "/path/to/my_ml-lib_data"
                                 );
        
        message_descriptor read(
                                "read",
                                "read training data and / or model, first argument gives path to the read file",
                                "/path/to/my_ml-lib_data"
                                );
        
        message_descriptor clear(
                                 "clear",
                                 "clear the stored training data and model"
                                 );
        
        message_descriptor help(
                               "help",
                               "post usage statement to the console"
                               );
        
        valued_message_descriptor<int> scaling(
                                               "scaling",
                                               "sets whether values are automatically scaled",
                                               {0, 1},
                                               1
                                               );
        
        valued_message_descriptor<int> record(
                                              "record",
                                              "start or stop time series recording for a single example of a given class",
                                              {0, 1},
                                              0
                                              );
        
        ranged_message_descriptor<float> training_rate(
                                                       "training_rate",
                                                       "set the learning rate, used to update the weights at each step of learning algorithms such as stochastic gradient descent.",
                                                       0.01,
                                                       1.0,
                                                       0.1
                                                       );
        
        ranged_message_descriptor<float> min_change(
                                                    "min_change",
                                                    "set the minimum change that must be achieved between two training epochs for the training to continue",
                                                    0.0,
                                                    1.0,
                                                    1.0e-5
                                                    );
        
        ranged_message_descriptor<int> max_iterations(
                                                      "max_iterations",
                                                      "set the maximum number of training iterations",
                                                      0,
                                                      1000,
                                                      100
                                                      );
        
        record.insert_before = "add";
        descriptors[ml::k_base].add_message_descriptor(add, write, read, train, clear, map, help, scaling, training_rate, min_change, max_iterations);

        // generic classification descriptor
        valued_message_descriptor<bool> null_rejection(
                                                       "null_rejection",
                                                       "toggle NULL rejection off or on, when 'on' classification results below the NULL-rejection threshold will be discarded",
                                                       {false, true},
                                                       true
                                                       );
        
        ranged_message_descriptor<float> null_rejection_coeff(
                                                              "null_rejection_coeff",
                                                              "set a multiplier for the NULL-rejection threshold ",
                                                              0.1,
                                                              1.0,
                                                              0.9
                                                              );
        
        valued_message_descriptor<int> probs(
                                             "probs",
                                             "determines whether probabilities are sent from the right outlet",
                                             {0, 1},
                                             0
                                             );
        
        descriptors[ml::k_classification].add_message_descriptor(null_rejection_coeff, probs, null_rejection);
        
        // generic feature extraction descriptor
//        descriptors[ml::k_feature_extraction].add_message_descriptor(null_rejection_coeff, null_rejection);

        // generic regression descriptor
       
        
//        descriptors[ml::k_regression].add_message_descriptor(training_rate, min_change, max_iterations);
        
        // Object-specific descriptors
        //-- Regressifiers
        //---- ann
        valued_message_descriptor<ml::data_type> mode("mode",
                                                      "set the mode of the ANN, " + std::to_string(ml::LABELLED_CLASSIFICATION) + " for classification, " + std::to_string(ml::LABELLED_REGRESSION) + " for regression",
                                                      {ml::LABELLED_CLASSIFICATION, ml::LABELLED_REGRESSION, ml::LABELLED_TIME_SERIES_CLASSIFICATION},
                                                      ml::defaults::data_type
                                                      );
        
        
        message_descriptor add_ann(
                              "add",
                              "class id followed by n features, <class> <feature 1> <feature 2> etc when in classification mode or N output values followed by M input values when in regression mode (N = num_outputs)",
                                   "1 0.2 0.7 0.3 0.1"

                              );
      
        ranged_message_descriptor<int> num_outputs(
                                                   "num_outputs",
                                                   "set the number of neurons in the output layer",
                                                   1,
                                                   1000,
                                                   ml::defaults::num_output_dimensions
                                                   );
        
        ranged_message_descriptor<int> num_hidden(
                                                  "num_hidden",
                                                  "set the number of neurons in the hidden layer",
                                                  1,
                                                  1000,
                                                  ml::defaults::num_hidden_neurons
                                                  );
        
        ranged_message_descriptor<int> min_epochs(
                                                  "min_epochs",
                                                  "setting the minimum number of training iterations",
                                                  1,
                                                  1000,
                                                  10
                                                  );
        
        // TODO: check if the "epochs" are still needed or if we can use "iterations" as inherited from ml_regression
        ranged_message_descriptor<int> max_epochs(
                                                  "max_epochs",
                                                  "setting the maximum number of training iterations",
                                                  1,
                                                  10000,
                                                  100
                                                  );

        ranged_message_descriptor<float> momentum(
                                                  "momentum",
                                                  "set the momentum",
                                                  0.0,
                                                  1.0,
                                                  0.5
                                                  );
        
        ranged_message_descriptor<float> gamma(
                                                  "gamma",
                                                  "set the gamma",
                                                  0.0,
                                                  10.0,
                                                  2.0
                                                  );
        
        // TODO: have optional value_labels for value labels
        valued_message_descriptor<int> input_activation_function(
                                                                 "input_activation_function",
                                                                 "set the activation function for the input layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );
        
        valued_message_descriptor<int> hidden_activation_function(
                                                                 "hidden_activation_function",
                                                                 "set the activation function for the hidden layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );
        
        valued_message_descriptor<int> output_activation_function(
                                                                 "output_activation_function",
                                                                 "set the activation function for the output layer, 0:LINEAR, 1:SIGMOID, 2:BIPOLAR_SIGMOID",
                                                                 {0, 1, 2},
                                                                 0
                                                                 );

                                                                 
        ranged_message_descriptor<int> rand_training_iterations(
                                                                 "rand_training_iterations",
                                                                 "set the number of random training iterations",
                                                                 0,
                                                                 1000,
                                                                 10
                                                                 );

        valued_message_descriptor<bool> use_validation_set(
                                                           "use_validation_set",
                                                           "set whether to use a validation training set",
                                                           {false, true},
                                                           true
                                                           );
        
        ranged_message_descriptor<int> validation_set_size(
                                                           "validation_set_size",
                                                           "set the size of the validation set",
                                                           1,
                                                           100,
                                                           20
                                                           );
        
        valued_message_descriptor<bool> randomize_training_order(
                                                           "randomize_training_order",
                                                           "sets whether to randomize the training order",
                                                           {false, true},
                                                           false
                                                           );
        
        
        descriptors[ml::k_ann].add_message_descriptor(add_ann, probs, mode, null_rejection, null_rejection_coeff, num_outputs, num_hidden, min_epochs, max_epochs, momentum, gamma, input_activation_function, hidden_activation_function, output_activation_function, rand_training_iterations, use_validation_set, validation_set_size, randomize_training_order);
        
        
        //-- Classifiers
        //---- ml.svm
        ranged_message_descriptor<int> type(
                                            "type",
                                            "set SVM type,"
                                            " 0:C-SVC (multi-class),"
                                            " 1:nu-SVC (multi-class),"
                                            " 2:one-class SVM,"
                                           // " 3:epsilon-SVR (regression),"
                                           // " 4:nu-SVR (regression)"
                                            ,
                                            0,
                                            2,
                                            0
                                            //        "	0 -- C-SVC		(multi-class classification)\n"
                                            //        "	1 -- nu-SVC		(multi-class classification)\n"
                                            //        "	2 -- one-class SVM\n"
                                            //        "	3 -- epsilon-SVR	(regression)\n"
                                            //        "	4 -- nu-SVR		(regression)\n"

                                            );
        
        ranged_message_descriptor<int> kernel(
                                              "kernel",
                                              "set type of kernel function, "
                                              "0:linear, " // (u'*v),"
                                              "1:polynomial, " // (gamma*u'*v + coef0)^degree,"
                                              "2:radial basis function, " //: exp(-gamma*|u-v|^2),"
                                              "3:sigmoid, " //  tanh(gamma*u'*v + coef0),"
                                              "4:precomputed kernel (kernel values in training_set_file)",
                                              0,
                                              4,
                                              0
                                              //        "	0 -- linear: u'*v\n"
                                              //        "	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
                                              //        "	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
                                              //        "	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
                                              //        "	4 -- precomputed kernel (kernel values in training_set_file)\n"
                                              );
        
        ranged_message_descriptor<float> degree(
                                              "degree",
                                              "set degree in kernel function",
                                              0,
                                              20,
                                              3
                                              );
        
        ranged_message_descriptor<float> svm_gamma(
                                              "gamma",
                                              "set gamma in kernel function",
                                              0.0,
                                              1.0,
                                              0.5
                                              );
        
        ranged_message_descriptor<float> coef0(
                                               "coef0",
                                               "coef0 in kernel function",
                                               INFINITY * -1.f, INFINITY,
                                               0.0
                                               );
        
        ranged_message_descriptor<float> cost(
                                               "cost",
                                               "set the parameter C of C-SVC, epsilon-SVR, and nu-SVR",
                                               INFINITY * -1.f, INFINITY,
                                               1.0
                                               );
        
        ranged_message_descriptor<float> nu(
                                              "nu",
                                              "set the parameter nu of nu-SVC, one-class SVM, and nu-SVR",
                                              INFINITY * -1.f, INFINITY,
                                              0.5
                                              );
        
        message_descriptor cross_validation(
                                            "cross_validation",
                                            "perform cross validation"
                                            );
        
        ranged_message_descriptor<int> num_folds(
                                                 "num_folds",
                                                 "set the number of folds used for cross validation",
                                                 1, 100,
                                                 10
                                                 );
        
        descriptors[ml::k_svm].add_message_descriptor(cross_validation, num_folds, type, kernel, degree, svm_gamma, coef0, cost, nu);
        
        //---- ml.adaboost        
        ranged_message_descriptor<int> num_boosting_iterations(
                                                                "num_boosting_iterations",
                                                               "set the number of boosting iterations that should be used when training the model",
                                                               0,
                                                               200,
                                                               20
                                                               );
        
        valued_message_descriptor<int> prediction_method(
                                                        "prediction_method",
                                                         "set the Adaboost prediction method, 0:MAX_VALUE, 1:MAX_POSITIVE_VALUE",
                                                         {GRT::AdaBoost::MAX_VALUE, GRT::AdaBoost::MAX_POSITIVE_VALUE},
                                                         GRT::AdaBoost::MAX_VALUE
                                                         
        );
        
        valued_message_descriptor<int> set_weak_classifier(
                                                           "set_weak_classifier",
                                                           "sets the weak classifier to be used by Adaboost, 0:DECISION_STUMP, 1:RADIAL_BASIS_FUNCTION",
                                                           {ml::weak_classifiers::DECISION_STUMP, ml::weak_classifiers::RADIAL_BASIS_FUNCTION},
                                                           ml::weak_classifiers::DECISION_STUMP
                                                           );
        
        valued_message_descriptor<int> add_weak_classifier(
                                                           "add_weak_classifier",
                                                           "add a weak classifier to the list of classifiers used by Adaboost",
                                                           {ml::weak_classifiers::DECISION_STUMP, ml::weak_classifiers::RADIAL_BASIS_FUNCTION},
                                                           ml::weak_classifiers::DECISION_STUMP
                                                           );

        descriptors[ml::k_adaboost].add_message_descriptor(num_boosting_iterations, prediction_method, set_weak_classifier, add_weak_classifier);
        
        //---- ml.anbc
        message_descriptor weights("weights",
                                   "vector of 1 integer and N floating point values where the integer is a class label and the floats are the weights for that class. Sending weights with a vector size of zero clears all weights"
                                   );
        
        descriptors[ml::k_anbc].add_message_descriptor(weights);
        
        //---- ml.dtw
        valued_message_descriptor<int> rejection_mode(
                                                      "rejection_mode",
                                                      "sets the method used for null rejection, 0:TEMPLATE_THRESHOLDS, 1:CLASS_LIKELIHOODS, 2:THRESHOLDS_AND_LIKELIHOODS",
                                                      {GRT::DTW::TEMPLATE_THRESHOLDS, GRT::DTW::CLASS_LIKELIHOODS, GRT::DTW::THRESHOLDS_AND_LIKELIHOODS},
                                                      GRT::DTW::TEMPLATE_THRESHOLDS
                                                      );
        
        ranged_message_descriptor<float> warping_radius(
                                                        "warping_radius",
                                                        "sets the radius of the warping path, which is used if the constrain_warping_path is set to 1",
                                                        0.0,
                                                        1.0,
                                                        0.2
                                                        );
        
        valued_message_descriptor<bool> offset_time_series(
                                                           "offset_time_series",
                                                           "set if each timeseries should be offset by the first sample in the time series",
                                                           {false, true},
                                                           false
                                                           );
        
        valued_message_descriptor<bool> constrain_warping_path(
                                                           "constrain_warping_path",
                                                           "sets the warping path should be constrained to within a specific radius from the main diagonal of the cost matrix",
                                                           {false, true},
                                                           true
                                                           );
        
        valued_message_descriptor<bool> enable_z_normalization(
                                                               "enable_z_normalization",
                                                               "turn z-normalization on or off for training and prediction",
                                                               {false, true},
                                                               false
                                                               );
        
        valued_message_descriptor<bool> enable_trim_training_data(
                                                               "enable_trim_training_data",
                                                               "enabling data trimming prior to training",
                                                               {false, true},
                                                               false
                                                               );
  
        descriptors[ml::k_dtw].insert_message_descriptor(record);
        descriptors[ml::k_dtw].add_message_descriptor(rejection_mode, warping_radius, offset_time_series, constrain_warping_path, enable_z_normalization, enable_trim_training_data);
        
        //---- ml.hmmc
        valued_message_descriptor<int> model_type(
                                                  "model_type",
                                                  "set the model type used, 0:ERGODIC, 1:LEFTRIGHT",
                                                  {HMM_ERGODIC, HMM_LEFTRIGHT},
                                                  HMM_LEFTRIGHT
                                                  );
        
        ranged_message_descriptor<int> delta(
                                             "delta",
                                             "control how many states a model can transition to if the LEFTRIGHT model type is used",
                                             1,
                                             100,
                                             11
                                             );
        
        ranged_message_descriptor<int> max_num_iterations(
                                                          "max_num_iterations",
                                                          "set the maximum number of training iterations",
                                                          1,
                                                          1000,
                                                          100
                                                          );
        
        ranged_message_descriptor<int> committee_size(
                                                      "committee_size",
                                                      "set the committee size for the number of votes combined to make a prediction",
                                                      1,
                                                      1000,
                                                      5
                                                      );
        
        ranged_message_descriptor<int> downsample_factor(
                                                      "downsample_factor",
                                                         "set the downsample factor for the resampling of each training time series. A factor of 5 will result in each time series being resized (smaller) by a factor of 5",
                                                      1,
                                                      1000,
                                                      5
                                                      );
        
        descriptors[ml::k_hmmc].insert_message_descriptor(record);
        descriptors[ml::k_hmmc].add_message_descriptor(model_type, delta, max_num_iterations, committee_size, downsample_factor);
        
        //---- ml.softmax
        
        //---- ml.randforest
        ranged_message_descriptor<int> num_random_splits(
                                                         "num_random_splits",
                                                         "set the number of steps that will be used to search for the best spliting value for each node",
                                                         1,
                                                         1000,
                                                         100
                                                         );
        
        ranged_message_descriptor<int> min_samples_per_node2(
                                                            "min_samples_per_node",
                                                            "set the minimum number of samples that are allowed per node",
                                                            1,
                                                            100,
                                                            5
                                                            );
        
        ranged_message_descriptor<int> max_depth(
                                                 "max_depth",
                                                 "sets the maximum depth of the tree, any node that reaches this depth will automatically become a leaf node",
                                                 1,
                                                 100,
                                                 10
                                                 );

        descriptors[ml::k_randforest].add_message_descriptor(num_random_splits, min_samples_per_node2, max_depth);
        
        //----ml.mindist
        ranged_message_descriptor<int> num_clusters(
                                                    "num_clusters",
                                                    "set how many clusters each model will try to find during the training phase",
                                                    1,
                                                    100,
                                                    10
                                                    );

        descriptors[ml::k_mindist].add_message_descriptor(num_clusters);
                
        //---- ml.knn
//        "best_k_value_search:\tbool (0 or 1) set whether k value search is enabled or not (default 0)\n";

        ranged_message_descriptor<int> k(
                                         "k",
                                         "sets the K nearest neighbours that will be searched for by the algorithm during prediction",
                                         1,
                                         500,
                                         10
                                         );
        
        ranged_message_descriptor<int> min_k_search_value(
                                         "min_k_search_value",
                                         "set the minimum K value to use when searching for the best K value",
                                         1,
                                         500,
                                         1
                                         );
        
        ranged_message_descriptor<int> max_k_search_value(
                                                          "max_k_search_value",
                                                          "set the maximum K value to use when searching for the best K value",
                                                          1,
                                                          500,
                                                          10
                                                          );
        
        valued_message_descriptor<bool> best_k_value_search(
                                                            "best_k_value_search",
                                                            "set whether k value search is enabled or not",
                                                            {false, true},
                                                            false
                                                            );
        
        descriptors[ml::k_knn].add_message_descriptor(k, min_k_search_value, max_k_search_value, best_k_value_search);
        
        //---- ml.gmm
        ranged_message_descriptor<int> num_mixture_models(
                                                          "num_mixture_models",
                                                          "sets the number of mixture models used for class",
                                                          1,
                                                          20,
                                                          2
                                                          );

        descriptors[ml::k_gmm].add_message_descriptor(num_mixture_models);

        //---- ml.dtree
        valued_message_descriptor<bool> training_mode(
                                                      "training_mode",
                                                      "set the training mode",
                                                      {GRT::Tree::BEST_ITERATIVE_SPILT, GRT::Tree::BEST_RANDOM_SPLIT},
                                                      GRT::Tree::BEST_ITERATIVE_SPILT
                                                      );
        
        ranged_message_descriptor<int> num_splitting_steps(
                                                          "num_splitting_steps",
                                                          "set the number of steps that will be used to search for the best spliting value for each node",
                                                          1,
                                                          500,
                                                          100
                                                          );
        
        ranged_message_descriptor<int> min_samples_per_node(
                                                          "min_samples_per_node",
                                                          "sets the minimum number of samples that are allowed per node, if the number of samples at a node is below this value then the node will automatically become a leaf node",
                                                          1,
                                                          100,
                                                          5
                                                          );
        
        ranged_message_descriptor<int> dtree_max_depth(
                                                 "max_depth",
                                                 "sets the maximum depth of the tree, any node that reaches this depth will automatically become a leaf node",
                                                 1,
                                                 100,
                                                 10
                                                 );
        
        valued_message_descriptor<bool> remove_features_at_each_split(
                                                               "remove_features_at_each_split",
                                                               "set if a feature is removed at each spilt so it can not be used again",
                                                               {false, true},
                                                               false
                                                               );
        descriptors[ml::k_dtree].add_message_descriptor(training_mode, num_splitting_steps, min_samples_per_node, dtree_max_depth, remove_features_at_each_split);

        //-- Feature extraction
        
        //---- ml.peak
        ranged_message_descriptor<int> search_window_size(
                                                          "search_window_size",
                                                          "set the search window size in values",
                                                          1,
                                                          500,
                                                          5
                                                          );
        
        ranged_message_descriptor<float> peak(
                                              "float",
                                              "set the current value of the peak detector, a bang will be output when a peak is detected",
                                              INFINITY * -1.f, INFINITY,
                                              1
                                              );
        
        message_descriptor reset(
                                "reset",
                                "reset the peak detector"
                                );
        
        message_descriptor peak_help(
                                 "help",
                                 "post usage statement to the console"
                                 );


        descriptors[ml::k_peak].add_message_descriptor(peak, reset, search_window_size, peak_help);
        
        //---- ml.minmax
        
        message_descriptor input(
                                 "list",
                                 "list of float values in which to find minima and maxima",
                                 "0.1 0.5 -0.3 0.1 0.2 -0.1 0.7 0.1 0.3"
                                 );
        
        ranged_message_descriptor<float> minmax_delta(
                                                      "delta",
                                                      "setting the minmax delta. Input values will be considered to be peaks if they are greater than the previous and next value by at least the delta value",
                                                      0,
                                                      1,
                                                      0.1
                                                      );
        
        descriptors[ml::k_minmax].add_message_descriptor(input, minmax_delta);
        
        //---- ml.zerox
        
        valued_message_descriptor<float> zerox_map(
                                                   "map",
                                                   "a stream of input values in which to detect zero crossings",
                                                   0.5
                                                   );
        
        ranged_message_descriptor<float> dead_zone_threshold(
                                                             "dead_zone_threshold",
                                                             "set the dead zone threshold",
                                                             0.f,
                                                             1.f,
                                                             0.01f
                                                             );
        
        ranged_message_descriptor<int> zerox_search_window_size(
                                                          "search_window_size",
                                                          "set the search window size in values",
                                                          1,
                                                          500,
                                                          20
                                                          );
        
        descriptors[ml::k_zerox].add_message_descriptor(zerox_map, dead_zone_threshold, zerox_search_window_size);
    }
Example #8
0
int main(int argc, char *argv[])
{
   char *ftest = NULL;
   struct timeval t0, t1, diff;
   problem *train, *test;
   int regpath_flag = 0, backtracking_flag = 0, std_flag = 1, verbose_flag = 0;
   int iter = 1000, c, crossval_flag = 0, nr_folds = 10, nval = 100, nzerow;
   double *w, *y_hat, *mean, *var;
   double lambda_1 = 1e-6, lambda_2 = 0, tol = 1e-9, epsilon, fret;

   while (1)
   {
      static struct option long_options[] =
      {
         /* These options don't set a flag.
          We distinguish them by their indices. */
         {"help",                   no_argument, 0, 'h'},
         {"verbose",                no_argument, 0, 'v'},
         {"backtracking",           no_argument, 0, 'b'},
         {"original",               no_argument, 0, 'o'},
         {"test",             required_argument, 0, 't'},
         {"l1",               required_argument, 0, 'l'},
         {"l2",               required_argument, 0, 'r'},
         {"cross-validation", optional_argument, 0, 'c'},
         {"tolerance       ", optional_argument, 0, 'e'},
         {"regpath",          optional_argument, 0, 'p'},
         /*{"stop",             optional_argument, 0, 's'},*/
         {"max-iters",        optional_argument, 0, 'i'},
         {0, 0, 0, 0}
      };

      int option_index = 0;

      c = getopt_long (argc, argv, "vhbot:r:l:p::c::e::s::i::", long_options, &option_index);

      /* Detect the end of the options. */
      if (c == -1)
         break;

      switch(c)
      {
         case 'h':
            exit_with_help(argv[PROG]);
            break;

         case 'b':
            backtracking_flag = 1;
            break;

         case 'v':
            verbose_flag = 1;
            break;

         case 'o':
            std_flag = 0;
            break;

         case 't':
            ftest = optarg;
            break;

         case 'c':
            crossval_flag = 1;
            if (optarg)
               if (sscanf(optarg, "%d", &nr_folds) != 1)
               {
                  fprintf(stderr, "%s: option -c requires an int\n", argv[PROG]);
                  exit_without_help(argv[PROG]);
               }
            break;

         case 'e':
            if (optarg)
               if (sscanf(optarg, "%lf", &tol) != 1)
               {
                  fprintf(stderr, "%s: option -e requires a double\n", argv[PROG]);
                  exit_without_help(argv[PROG]);
               }
            break;

         case 'p':
            regpath_flag = 1;
            if (optarg)
               if (sscanf(optarg, "%d", &nval) != 1)
               {
                  fprintf(stderr, "%s: option -p requires an int\n", argv[PROG]);
                  exit_without_help(argv[PROG]);
               }
            break;

         //case 's':
         //   search_flag = 1;
         //   if (optarg)
         //      if (sscanf(optarg, "%lf:%d:%lf", &lmax, &nval, &lmin) != 3)
         //      {
         //         printf("%s\n", optarg);
         //         fprintf(stderr, "%s: option -s requires a range in the format MAX:NVAL:MIN\n", argv[PROG]);
         //         exit_without_help(argv[PROG]);
         //      }
         //   break;

         case 'l':
            if (sscanf(optarg, "%lf", &lambda_1) != 1)
            {
               fprintf(stderr, "%s: option -l requires a float\n", argv[PROG]);
               exit_without_help(argv[PROG]);
            }
            break;

         case 'r':
            if (sscanf(optarg, "%lf", &lambda_2) != 1)
            {
               fprintf(stderr, "%s: option -r requires a float\n", argv[PROG]);
               exit_without_help(argv[PROG]);
            }
            break;

         case 'i':
            if (optarg)
               if (sscanf(optarg, "%d", &iter) != 1)
               {
                  fprintf(stderr, "%s: option -i requires an int\n", argv[PROG]);
                  exit_without_help(argv[PROG]);
               }
            break;

         case '?':
            /* getopt_long already printed an error message. */
            exit_without_help(argv[PROG]);
            break;

         default:
            printf("?? getopt returned character code 0%o ??\n", c); 
      }
   }

   if ((argc - optind) < ARGC_MIN || (argc - optind) > ARGC_MAX)
   {
      fprintf(stderr, "%s: missing file operand\n", argv[PROG]);
      exit_without_help(argv[PROG]);
   }

   /* start time */
   gettimeofday(&t0, 0);

   train = read_problem(argv[optind]);

   fprintf(stdout, "n:%d dim:%d\n", train->n, train->dim);

   /* alloc vector for means and variances, plus 1 for output */
   if (std_flag)
   {
      fprintf(stdout, "Standarizing train set...\n");
      mean = dvector(1, train->dim+1);
      var = dvector(1, train->dim+1);
      standarize(train, 1, mean, var);
   }

   if (ftest)
   {
      test = read_problem(ftest);
      if (std_flag)
         standarize(test, 0, mean, var);
   }

   if (regpath_flag)
   {
      fprintf(stdout, "Regularization path...\n");
      /* in glmnet package they use 0.0001 instead of 0.001 ? */
      epsilon = train->n > train->dim ? 0.001 : 0.01;
      lambda_1 = regularization_path(train, epsilon, nval);
   }

   fprintf(stdout, "lambda_1: %g\n", lambda_1);
   fprintf(stdout, "lambda_2: %g\n", lambda_2);

   /* initialize weight vector to 0 */
   w = dvector(1, train->dim);
   dvset(w, train->dim, 0);

   fprintf(stdout, "Training model...\n");
   if (backtracking_flag)
      /*fista_backtrack(train, w, lambda_1, lambda_2, tol, &iter, &fret);*/
      fista_nocov(train, w, lambda_1, lambda_2, tol, &iter, &fret);
   else
      fista(train, w, lambda_1, lambda_2, tol, verbose_flag, &iter, &fret);

   y_hat = dvector(1, train->n);
   fista_predict(train, w, y_hat);

   nzerow = dvnotzero(w, train->dim);

   fprintf(stdout, "Iterations: %d\n", iter);
   fprintf(stdout, "Active weights: %d/%d\n", nzerow, train->dim);
   if (std_flag)
      fprintf(stdout, "MAE train: %g\n", var[train->dim+1]*mae(train->y, train->n, y_hat));
   fprintf(stdout, "MAE train (standarized): %g\n", mae(train->y, train->n, y_hat));

   free_dvector(y_hat, 1, train->n);

   if (crossval_flag)
   {
      dvset(w, train->dim, 0);
      y_hat = dvector(1, train->n);
      cross_validation(train, w, lambda_1, lambda_2, nr_folds, y_hat);
      fprintf(stdout, "MAE cross-validation: %lf\n",
              mae(train->y, train->n, y_hat));
      free_dvector(y_hat, 1, train->n);
   }

   if (ftest)
   {
      /* we alloc memory again since test size is different from train size */
      y_hat = dvector(1, test->n);
      fista_predict(test, w, y_hat);
      fprintf(stdout, "MAE test: %g\n", mae(test->y, test->n, y_hat));
      free_dvector(y_hat, 1, test->n);
   }

   /* stop time */
   gettimeofday(&t1, 0);
   timeval_subtract(&t1, &t0, &diff);
   fprintf(stdout, "Time(h:m:s.us): %02d:%02d:%02d.%06ld\n",
           diff.tv_sec/3600, (diff.tv_sec/60), diff.tv_sec%60, diff.tv_usec);

   if (verbose_flag)
   {
      fprintf(stdout, "Weights: ");
      dvprint(stdout, w, train->dim);
   }

   free_dvector(w, 1, train->dim);

   if (std_flag)
   {
      free_dvector(mean, 1, train->dim+1);
      free_dvector(var, 1, train->dim+1);
   }

   if (ftest)
   {
      free_dvector(test->y, 1, test->n);
      free_dmatrix(test->X, 1, test->n, 1, test->dim);
      free(test);
   }

   free_dvector(train->y, 1, train->n);
   free_dmatrix(train->X, 1, train->n, 1, train->dim);
   free(train);

   return 0;
}
void LVlinear_cross_validation(lvError *lvErr, const LVlinear_problem *prob_in, const LVlinear_parameter *param_in, const int32_t nr_fold, LVArray_Hdl<double> target_out){
	try{
		// Input verification: Nonempty problem
		if (prob_in->x == nullptr || (*(prob_in->x))->dimSize == 0)
			throw LVException(__FILE__, __LINE__, "Empty problem passed to liblinear_crossvalidation.");

		// Input verification: Problem dimensions
		if ((*(prob_in->x))->dimSize != (*(prob_in->y))->dimSize)
			throw LVException(__FILE__, __LINE__, "The problem must have an equal number of labels and feature vectors (x and y).");

		uint32_t nr_nodes = (*(prob_in->y))->dimSize;
		// Input validation: Number of feature vectors too large (exceeds max signed int)
		if(nr_nodes > INT_MAX)
			throw LVException(__FILE__, __LINE__, "Number of feature vectors too large (grater than " + std::to_string(INT_MAX) + ")");

		// Convert LVsvm_problem to svm_problem
		auto prob = std::make_unique<problem>();
		prob->l = nr_nodes;
		prob->y = (*(prob_in->y))->elt; 
		prob->n = 0; // Calculated later
		prob->bias = prob_in->bias;

		// Create and array of pointers (sparse datastructure)
		auto x = std::make_unique<feature_node*[]>(nr_nodes);
		prob->x = x.get();

		auto x_in = prob_in->x;
		for (unsigned int i = 0; i < (*x_in)->dimSize; i++){
			// Assign the innermost svm_node array pointers to the array of pointers
			auto xi_in_Hdl = (*x_in)->elt[i];
			x[i] = reinterpret_cast<feature_node*>((*xi_in_Hdl)->elt);

			// Input validation: Final index -1?
			if ((*xi_in_Hdl)->elt[(*xi_in_Hdl)->dimSize - 1].index != -1)
				throw LVException(__FILE__, __LINE__, "The index of the last element of each feature vector needs to be -1 (libsvm_crossvalidation).");

			// Calculate the max index
			// This detail is not exposed in LabVIEW, as setting the wrong value causes a crash
			// Second to last element should contain the max index for that feature vector (as they are in ascending order).
			auto secondToLast = (*xi_in_Hdl)->dimSize - 2; // Ignoring -1 index
			auto largestIndex = (*xi_in_Hdl)->elt[secondToLast].index;
			if (largestIndex > prob->n)
				prob->n = largestIndex;
		}

		// n increases by one if bias is present
		if (prob_in->bias >= 0)
			prob->n++;

		// Assign parameters to svm_parameter
		auto param = std::make_unique<parameter>();
		LVConvertParameter(*param_in, *param);

		// Verify parameters
		const char * param_check = check_parameter(prob.get(), param.get());
		if (param_check != nullptr)
			throw LVException(__FILE__, __LINE__, "Parameter check failed with the following error: " + std::string(param_check));

		// Allocate room in target_out
		LVResizeNumericArrayHandle(target_out, nr_nodes);

		// Run cross validation
		cross_validation(prob.get(), param.get(), nr_fold, (*target_out)->elt);
		
		(*target_out)->dimSize = nr_nodes;
	}
	catch (LVException &ex) {
		ex.returnError(lvErr);
		(*target_out)->dimSize = 0;
	}
	catch (std::exception &ex) {
		LVException::returnStdException(lvErr, __FILE__, __LINE__, ex);
		(*target_out)->dimSize = 0;
	}
	catch (...) {
		LVException ex(__FILE__, __LINE__, "Unknown exception has occurred");
		ex.returnError(lvErr);
		(*target_out)->dimSize = 0;
	}
}