int main(int argc, char *argv[]) { g_conf.number_of_feature = 3; g_conf.max_depth = 4; if (argc > 1) { g_conf.max_depth = boost::lexical_cast<int>(argv[1]); } DataVector d; bool r = LoadDataFromFile("../../data/train.dat", &d); assert(r); RegressionTree tree; tree.Fit(&d); std::ofstream model_output("../../data/model"); model_output << tree.Save(); RegressionTree tree2; tree2.Load(tree.Save()); DataVector::iterator iter = d.begin(); PredictVector predict; for ( ; iter != d.end(); ++iter) { std::cout << (*iter)->ToString() << std::endl; ValueType p = tree2.Predict(**iter); predict.push_back(p); std::cout << p << "," << tree.Predict(**iter) << std::endl; } std::cout << "rmse: " << RMSE(d, predict) << std::endl; CleanDataVector(&d); return 0; }
double RMSE(const DataVector &data, const PredictVector &predict, size_t len) { assert(data.size() >= len); assert(predict.size() >= len); double s = 0; double c = 0; for (size_t i = 0; i < data.size(); ++i) { s += Squared(predict[i] - data[i]->label) * data[i]->weight; c += data[i]->weight; } return std::sqrt(s / c); }
int main(int argc, char *argv[]) { std::srand ( unsigned ( std::time(0) ) ); g_conf.number_of_feature = 3; g_conf.max_depth = 4; g_conf.iterations = 100; g_conf.shrinkage = 0.1F; if (argc < 3) return -1; std::string train_file(argv[1]); std::string test_file(argv[2]); if (argc > 3) { g_conf.max_depth = boost::lexical_cast<int>(argv[3]); } if (argc > 4) { g_conf.iterations = boost::lexical_cast<int>(argv[4]); } if (argc > 5) { g_conf.shrinkage = boost::lexical_cast<float>(argv[5]); } if (argc > 6) { g_conf.feature_sample_ratio = boost::lexical_cast<float>(argv[6]); } if (argc > 7) { g_conf.data_sample_ratio = boost::lexical_cast<float>(argv[7]); } g_conf.debug = true; // g_conf.loss = LOG_LIKELIHOOD; g_conf.loss = SQUARED_ERROR; DataVector d; bool r = LoadDataFromFile(train_file, &d); assert(r); // g_conf.min_leaf_size = d.size() / 10; std::cout << g_conf.ToString() << std::endl; GBDT gbdt; Elapsed elapsed; gbdt.Fit(&d); std::cout << "fit time: " << elapsed.Tell() << std::endl; CleanDataVector(&d); FreeVector(&d); std::string model_file = train_file + ".model"; std::ofstream model_output(model_file.c_str()); model_output << gbdt.Save(); GBDT gbdt2; gbdt2.Load(gbdt.Save()); DataVector d2; r = LoadDataFromFile(test_file, &d2); assert(r); elapsed.Reset(); DataVector::iterator iter = d2.begin(); PredictVector predict; for ( ; iter != d2.end(); ++iter) { ValueType p; if (g_conf.loss == SQUARED_ERROR) { p = gbdt2.Predict(**iter); predict.push_back(p); } else if (g_conf.loss == LOG_LIKELIHOOD) { p = gbdt2.Predict(**iter); p = Logit(p); if (p >= 0.5) p = 1; else p = -1; predict.push_back(p); } // std::cout << (*iter)->ToString() << std::endl // << p << std::endl; } std::cout << "predict time: " << elapsed.Tell() << std::endl; std::cout << "rmse: " << RMSE(d2, predict) << std::endl; CleanDataVector(&d2); return 0; }
int main(int argc, char *argv[]) { std::srand ( unsigned ( std::time(0) ) ); #ifdef USE_OPENMP const int threads_wanted = 4; omp_set_num_threads(threads_wanted); #endif g_conf.number_of_feature = 79; g_conf.max_depth = 6; g_conf.iterations = 10; g_conf.shrinkage = 0.1F; if (argc < 3) return -1; std::string train_file(argv[1]); std::string test_file(argv[2]); if (argc > 3) { g_conf.max_depth = boost::lexical_cast<int>(argv[3]); } if (argc > 4) { g_conf.iterations = boost::lexical_cast<int>(argv[4]); } if (argc > 5) { g_conf.shrinkage = boost::lexical_cast<float>(argv[5]); } if (argc > 6) { g_conf.feature_sample_ratio = boost::lexical_cast<float>(argv[6]); } if (argc > 7) { g_conf.data_sample_ratio = boost::lexical_cast<float>(argv[7]); } int debug = 0; if (argc > 8) { debug = boost::lexical_cast<int>(argv[8]); } g_conf.loss = LOG_LIKELIHOOD; g_conf.debug = debug > 0? true : false; DataVector d; bool r = LoadDataFromFile(train_file, &d); assert(r); g_conf.min_leaf_size = d.size() / 40; std::cout << "configure: " << std::endl << g_conf.ToString() << std::endl; if (argc > 9) { g_conf.LoadFeatureCost(argv[9]); } GBDT gbdt; Elapsed elapsed; gbdt.Fit(&d); std::cout << "fit time: " << elapsed.Tell() << std::endl; std::string model_file = train_file + ".model"; std::ofstream model_output(model_file.c_str()); model_output << gbdt.Save(); CleanDataVector(&d); FreeVector(&d); DataVector d2; r = LoadDataFromFile(test_file, &d2); assert(r); elapsed.Reset(); DataVector::iterator iter = d2.begin(); PredictVector predict; for ( ; iter != d2.end(); ++iter) { ValueType p = Logit(gbdt.Predict(**iter)); predict.push_back(p); } std::cout << "predict time: " << elapsed.Tell() << std::endl; std::string predict_file = test_file + ".predict"; std::ofstream predict_output(predict_file.c_str()); Auc auc; for (size_t i = 0; i < d2.size(); ++i) { predict_output << predict[i] << " " << d2[i]->ToString() << std::endl; auc.Add(predict[i], d2[i]->label); } std::cout << "auc: " << auc.CalculateAuc() << std::endl; auc.PrintConfusionTable(); CleanDataVector(&d2); return 0; }