Exemple #1
0
TEST(SVRTest, train) {

    DataSet dataset;
    dataset.load_from_file(DATASET_DIR "test_dataset.txt");
//    dataset.load_from_file(DATASET_DIR "E2006.train");
    SvmParam param;
    param.gamma = 0.25;
    param.C = 10;
    param.p = 0.1;
    param.epsilon = 0.001;
    param.nu = 0.5;
    param.kernel_type = SvmParam::RBF;
    param.svm_type = SvmParam::EPSILON_SVR;
    SvmModel *model = new SVR();
    model->train(dataset, param);
    model->save_to_file(DATASET_DIR "test_dataset.txt.model");
    SvmModel *new_model = new SVR();
    new_model->load_from_file(DATASET_DIR "test_dataset.txt.model");
    vector<float_type> predict_y;
    predict_y = new_model->predict(dataset.instances(), 100);
    float_type mse = 0;
    for (unsigned i = 0; i < predict_y.size(); ++i) {
        mse += (predict_y[i] - dataset.y()[i]) * (predict_y[i] - dataset.y()[i]);
    }
    mse /= predict_y.size();

    LOG(INFO) << "MSE = " << mse;
    EXPECT_NEAR(mse, 0.03097, 1e-4);
}
Exemple #2
0
int main(int argc, char **argv) {
    struct svm_params params;
    struct svm_trainingInfo trainingInfo;
    SVM_FILE_TYPE file_type = LIBSVM_TXT;
    SVM_DATA_TYPE data_type = UNKNOWN;
    SVM_MODEL_FILE_TYPE model_file_type = M_LIBSVM_TXT;
    MyStopWatch clAll, clLoad, clProc, clStore;
    SvmData *data;
    SvmModel *model;

    /* Check input arguments. */
    if(help(argc, argv, data, model, &params, &file_type, &data_type, &model_file_type) != SUCCESS) {
        return EXIT_SUCCESS;
    }

    clAll.start();

    /* Load data. */ 
    clLoad.start();
    if(data->Load(argv[1], file_type, data_type) != SUCCESS) {
        return EXIT_FAILURE;
    }
    clLoad.stop();

    clProc.start();
    /* Train model. */
    if(model->Train(data, &params, &trainingInfo) != SUCCESS) {
        return EXIT_FAILURE;
    }
    clProc.stop();

    clStore.start();
    /* Predict values. */
    if(model->StoreModel(argv[2], model_file_type) != SUCCESS) {
        return EXIT_FAILURE;
    }

    /* Clean memory. */
    delete model;
    delete data;

    clStore.stop();

    clAll.stop();

    /* Print results. */
    printf("\nLoading    elapsed time : %0.4f s\n", clLoad.getTime());
    printf("Processing elapsed time : %0.4f s\n", clProc.getTime());
    printf("Storing    elapsed time : %0.4f s\n", clStore.getTime());
    printf("Total      elapsed time : %0.4f s\n", clAll.getTime());

    return EXIT_SUCCESS;
}
    void thundersvm_predict_sub(DataSet& predict_dataset, CMDParser& parser, char* model_file_path, char* output_file_path){
        fstream file;
        file.open(model_file_path, std::fstream::in);
        string feature, svm_type;
        file >> feature >> svm_type;
        CHECK_EQ(feature, "svm_type");
        SvmModel *model = nullptr;
        Metric *metric = nullptr;
        if (svm_type == "c_svc") {
            model = new SVC();
            metric = new Accuracy();
        } else if (svm_type == "nu_svc") {
            model = new NuSVC();
            metric = new Accuracy();
        } else if (svm_type == "one_class") {
            model = new OneClassSVC();
            //todo determine a metric
        } else if (svm_type == "epsilon_svr") {
            model = new SVR();
            metric = new MSE();
        } else if (svm_type == "nu_svr") {
            model = new NuSVR();
            metric = new MSE();
        }

#ifdef USE_CUDA
        CUDA_CHECK(cudaSetDevice(parser.gpu_id));
#endif

        model->set_max_memory_size_Byte(parser.param_cmd.max_mem_size);
        model->load_from_file(model_file_path);
        file.close();
        file.open(output_file_path, fstream::out);

        vector<float_type> predict_y;
        predict_y = model->predict(predict_dataset.instances(), -1);
        for (int i = 0; i < predict_y.size(); ++i) {
            file << predict_y[i] << std::endl;
        }
        file.close();

        if (metric) {
            LOG(INFO) << metric->name() << " = " << metric->score(predict_y, predict_dataset.y());
        }
    }
TEST(OneClassSVCTest, train) {
    DataSet dataset;
    dataset.load_from_file(DATASET_DIR "test_dataset.txt");
    SvmParam param;
    param.gamma = 0.5;
    param.nu = 0.1;
    param.epsilon = 0.001;
    param.kernel_type = SvmParam::RBF;
    param.svm_type = SvmParam::ONE_CLASS;
    SvmModel *model = new OneClassSVC();
    model->train(dataset, param);
    model->save_to_file(DATASET_DIR "test_dataset.txt.model");
    SvmModel *new_model = new OneClassSVC();
    new_model->load_from_file(DATASET_DIR "test_dataset.txt.model");
    vector<float_type> predict_y = new_model->predict(dataset.instances(), 100);
    int n_pos = 0;
    for (unsigned i = 0; i < predict_y.size(); ++i) {
        if (predict_y[i] > 0)
            n_pos++;
    }
    EXPECT_EQ(n_pos, 135);
    delete model;
    delete new_model;
}
    //void DataSet_load_from_python(DataSet *dataset, float *y, char **x, int len) {dataset->load_from_python(y, x, len);}
    void thundersvm_train_sub(DataSet& train_dataset, CMDParser& parser, char* model_file_path){
        SvmModel *model = nullptr;
        switch (parser.param_cmd.svm_type) {
            case SvmParam::C_SVC:
                model = new SVC();
                break;
            case SvmParam::NU_SVC:
                model = new NuSVC();
                break;
            case SvmParam::ONE_CLASS:
                model = new OneClassSVC();
                break;
            case SvmParam::EPSILON_SVR:
                model = new SVR();
                break;
            case SvmParam::NU_SVR:
                model = new NuSVR();
                break;
        }

        //todo add this to check_parameter method
        if (parser.param_cmd.svm_type == SvmParam::NU_SVC) {
            train_dataset.group_classes();
            for (int i = 0; i < train_dataset.n_classes(); ++i) {
                int n1 = train_dataset.count()[i];
                for (int j = i + 1; j < train_dataset.n_classes(); ++j) {
                    int n2 = train_dataset.count()[j];
                    if (parser.param_cmd.nu * (n1 + n2) / 2 > min(n1, n2)) {
                        printf("specified nu is infeasible\n");
                        return;
                    }
                }
            }
        }
		if (parser.param_cmd.kernel_type != SvmParam::LINEAR)
            if (!parser.gamma_set) {
                parser.param_cmd.gamma = 1.f / train_dataset.n_features();
            }
#ifdef USE_CUDA
        CUDA_CHECK(cudaSetDevice(parser.gpu_id));
#endif

        vector<float_type> predict_y, test_y;
        if (parser.do_cross_validation) {
            predict_y = model->cross_validation(train_dataset, parser.param_cmd, parser.nr_fold);
        } else {
            model->train(train_dataset, parser.param_cmd);
            model->save_to_file(model_file_path);
            LOG(INFO) << "evaluating training score";
            predict_y = model->predict(train_dataset.instances(), -1);
            //predict_y = model->predict(train_dataset.instances(), 10000);
            //test_y = train_dataset.y();
        }
        Metric *metric = nullptr;
        switch (parser.param_cmd.svm_type) {
            case SvmParam::C_SVC:
            case SvmParam::NU_SVC: {
                metric = new Accuracy();
                break;
            }
            case SvmParam::EPSILON_SVR:
            case SvmParam::NU_SVR: {
                metric = new MSE();
                break;
            }
            case SvmParam::ONE_CLASS: {
            }
        }
        if (metric) {
            LOG(INFO) << metric->name() << " = " << metric->score(predict_y, train_dataset.y()) << std::endl;
        }
        return;
    }
    void thundersvm_train_matlab(int argc, char **argv) {
        CMDParser parser;
        parser.parse_command_line(argc, argv);
        /*
        parser.param_cmd.svm_type = SvmParam::NU_SVC;
        parser.param_cmd.kernel_type = SvmParam::RBF;
        parser.param_cmd.C = 100;
        parser.param_cmd.gamma = 0;
        parser.param_cmd.nu = 0.1; 
        parser.param_cmd.epsilon = 0.001;
        */

        DataSet train_dataset;
        char input_file_path[1024] = DATASET_DIR;
        char model_file_path[1024] = DATASET_DIR;
        strcat(input_file_path, parser.svmtrain_input_file_name);
        strcat(model_file_path, parser.model_file_name);
        train_dataset.load_from_file(input_file_path);
        SvmModel *model = nullptr;
        switch (parser.param_cmd.svm_type) {
            case SvmParam::C_SVC:
                model = new SVC();
                break;
            case SvmParam::NU_SVC:
                model = new NuSVC();
                break;
            case SvmParam::ONE_CLASS:
                model = new OneClassSVC();
                break;
            case SvmParam::EPSILON_SVR:
                model = new SVR();
                break;
            case SvmParam::NU_SVR:
                model = new NuSVR();
                break;
        }

    	//todo add this to check_parameter method
        if (parser.param_cmd.svm_type == SvmParam::NU_SVC) {
            train_dataset.group_classes();
            for (int i = 0; i < train_dataset.n_classes(); ++i) {
                int n1 = train_dataset.count()[i];
                for (int j = i + 1; j < train_dataset.n_classes(); ++j) {
                    int n2 = train_dataset.count()[j];
                    if (parser.param_cmd.nu * (n1 + n2) / 2 > min(n1, n2)) {
                        printf("specified nu is infeasible\n");
                        return;
                    }
                }
            }
        }

    #ifdef USE_CUDA
        CUDA_CHECK(cudaSetDevice(parser.gpu_id));
    #endif

        vector<float_type> predict_y, test_y;
        if (parser.do_cross_validation) {
            vector<float_type> test_predict = model->cross_validation(train_dataset, parser.param_cmd, parser.nr_fold);
            int dataset_size = test_predict.size() / 2;
    	   test_y.insert(test_y.end(), test_predict.begin(), test_predict.begin() + dataset_size);
    	   predict_y.insert(predict_y.end(), test_predict.begin() + dataset_size, test_predict.end());
        } else {
            model->train(train_dataset, parser.param_cmd);
            model->save_to_file(model_file_path);
        	//predict_y = model->predict(train_dataset.instances(), 10000);
    		//test_y = train_dataset.y();
        }
	/*
        //perform svm testing
        Metric *metric = nullptr;
        switch (parser.param_cmd.svm_type) {
            case SvmParam::C_SVC:
            case SvmParam::NU_SVC: {
                metric = new Accuracy();
                break;
            }
            case SvmParam::EPSILON_SVR:
            case SvmParam::NU_SVR: {
                metric = new MSE();
                break;
            }
            case SvmParam::ONE_CLASS: {
            }
        }
        if (metric) {
            LOG(INFO) << metric->name() << " = " << metric->score(predict_y, test_y); 
        }
	*/
        return;
    }
    void thundersvm_predict_matlab(int argc, char **argv){
        CMDParser parser;
        parser.parse_command_line(argc, argv);

        char model_file_path[1024] = DATASET_DIR;
        char predict_file_path[1024] = DATASET_DIR;
        char output_file_path[1024] = DATASET_DIR;
        strcat(model_file_path, parser.svmpredict_model_file_name);
        strcat(predict_file_path, parser.svmpredict_input_file);
        strcat(output_file_path, parser.svmpredict_output_file);
        std::fstream file;
        //FILE *fp;
        //fp = fopen("model_file_path", "rb");
        file.open(model_file_path, std::fstream::in);
        string feature, svm_type;
        //char feature[20];
        //char svm_type[20];
        //fscanf(fp, "%s", feature);
        //fscanf(fp, "%s", svm_type);
        file >> feature >> svm_type;
        CHECK_EQ(feature, "svm_type");
        SvmModel *model = nullptr;
        Metric *metric = nullptr;
        if (svm_type == "c_svc") {
            model = new SVC();
            metric = new Accuracy();
        } else if (svm_type == "nu_svc") {
            model = new NuSVC();
            metric = new Accuracy();
        } else if (svm_type == "one_class") {
            model = new OneClassSVC();
            //todo determine a metric
        } else if (svm_type == "epsilon_svr") {
            model = new SVR();
            metric = new MSE();
        } else if (svm_type == "nu_svr") {
            model = new NuSVR();
            metric = new MSE();
        }

    #ifdef USE_CUDA
        CUDA_CHECK(cudaSetDevice(parser.gpu_id));
    #endif

        model->load_from_file(model_file_path);
        //fclose(fp);
	file.close();
        //fp = fopen("output_file_path", "wb");
        file.open(output_file_path, std::fstream::out);
        DataSet predict_dataset;
        predict_dataset.load_from_file(predict_file_path);
        vector<float_type> predict_y;
        predict_y = model->predict(predict_dataset.instances(), 10000);
	    for (int i = 0; i < predict_y.size(); ++i) {
            //fprintf(fp, "%s\n", predict_y[i]);
            file << predict_y[i] << std::endl;
        }
        //fclose(fp);
	file.close();
        if (metric) {
            LOG(INFO) << metric->name() << " = " << metric->score(predict_y, predict_dataset.y());
        }
    }