int main(int argc, char *argv[]) { std::srand ( unsigned ( std::time(0) ) ); #ifdef USE_OPENMP const int threads_wanted = 4; omp_set_num_threads(threads_wanted); #endif g_conf.number_of_feature = 79; g_conf.max_depth = 6; g_conf.iterations = 10; g_conf.shrinkage = 0.1F; if (argc < 3) return -1; std::string train_file(argv[1]); std::string test_file(argv[2]); if (argc > 3) { g_conf.max_depth = boost::lexical_cast<int>(argv[3]); } if (argc > 4) { g_conf.iterations = boost::lexical_cast<int>(argv[4]); } if (argc > 5) { g_conf.shrinkage = boost::lexical_cast<float>(argv[5]); } if (argc > 6) { g_conf.feature_sample_ratio = boost::lexical_cast<float>(argv[6]); } if (argc > 7) { g_conf.data_sample_ratio = boost::lexical_cast<float>(argv[7]); } int debug = 0; if (argc > 8) { debug = boost::lexical_cast<int>(argv[8]); } g_conf.loss = LOG_LIKELIHOOD; g_conf.debug = debug > 0? true : false; DataVector d; bool r = LoadDataFromFile(train_file, &d); assert(r); g_conf.min_leaf_size = d.size() / 40; std::cout << "configure: " << std::endl << g_conf.ToString() << std::endl; if (argc > 9) { g_conf.LoadFeatureCost(argv[9]); } GBDT gbdt; Elapsed elapsed; gbdt.Fit(&d); std::cout << "fit time: " << elapsed.Tell() << std::endl; std::string model_file = train_file + ".model"; std::ofstream model_output(model_file.c_str()); model_output << gbdt.Save(); CleanDataVector(&d); FreeVector(&d); DataVector d2; r = LoadDataFromFile(test_file, &d2); assert(r); elapsed.Reset(); DataVector::iterator iter = d2.begin(); PredictVector predict; for ( ; iter != d2.end(); ++iter) { ValueType p = Logit(gbdt.Predict(**iter)); predict.push_back(p); } std::cout << "predict time: " << elapsed.Tell() << std::endl; std::string predict_file = test_file + ".predict"; std::ofstream predict_output(predict_file.c_str()); Auc auc; for (size_t i = 0; i < d2.size(); ++i) { predict_output << predict[i] << " " << d2[i]->ToString() << std::endl; auc.Add(predict[i], d2[i]->label); } std::cout << "auc: " << auc.CalculateAuc() << std::endl; auc.PrintConfusionTable(); CleanDataVector(&d2); return 0; }
int main(int argc, char *argv[]) { std::srand ( unsigned ( std::time(0) ) ); g_conf.number_of_feature = 3; g_conf.max_depth = 4; g_conf.iterations = 100; g_conf.shrinkage = 0.1F; if (argc < 3) return -1; std::string train_file(argv[1]); std::string test_file(argv[2]); if (argc > 3) { g_conf.max_depth = boost::lexical_cast<int>(argv[3]); } if (argc > 4) { g_conf.iterations = boost::lexical_cast<int>(argv[4]); } if (argc > 5) { g_conf.shrinkage = boost::lexical_cast<float>(argv[5]); } if (argc > 6) { g_conf.feature_sample_ratio = boost::lexical_cast<float>(argv[6]); } if (argc > 7) { g_conf.data_sample_ratio = boost::lexical_cast<float>(argv[7]); } g_conf.debug = true; // g_conf.loss = LOG_LIKELIHOOD; g_conf.loss = SQUARED_ERROR; DataVector d; bool r = LoadDataFromFile(train_file, &d); assert(r); // g_conf.min_leaf_size = d.size() / 10; std::cout << g_conf.ToString() << std::endl; GBDT gbdt; Elapsed elapsed; gbdt.Fit(&d); std::cout << "fit time: " << elapsed.Tell() << std::endl; CleanDataVector(&d); FreeVector(&d); std::string model_file = train_file + ".model"; std::ofstream model_output(model_file.c_str()); model_output << gbdt.Save(); GBDT gbdt2; gbdt2.Load(gbdt.Save()); DataVector d2; r = LoadDataFromFile(test_file, &d2); assert(r); elapsed.Reset(); DataVector::iterator iter = d2.begin(); PredictVector predict; for ( ; iter != d2.end(); ++iter) { ValueType p; if (g_conf.loss == SQUARED_ERROR) { p = gbdt2.Predict(**iter); predict.push_back(p); } else if (g_conf.loss == LOG_LIKELIHOOD) { p = gbdt2.Predict(**iter); p = Logit(p); if (p >= 0.5) p = 1; else p = -1; predict.push_back(p); } // std::cout << (*iter)->ToString() << std::endl // << p << std::endl; } std::cout << "predict time: " << elapsed.Tell() << std::endl; std::cout << "rmse: " << RMSE(d2, predict) << std::endl; CleanDataVector(&d2); return 0; }
int main(int argc, char ** argv) { std::string input_file = ""; std::string input_type = "l2r"; std::string config_file = "./gbrt.conf"; std::string act_type = ""; std::string model_file = "./gbrt.model"; int dimention = 1024; //----parse command line int opt_c; while ( (opt_c = getopt( argc, argv, "d:f:i:c:m:tp")) != EOF ) { switch (opt_c) { case 'i': input_file = optarg; break; case 'f': input_type = optarg; break; case 'c': config_file = optarg; break; case 'm': model_file = optarg; break; case 't': act_type = "t"; break; case 'p': act_type = "p"; break; case 'd': dimention = atoi(optarg); default: break; } } //check options if ( act_type.length() == 0 || input_file.length() == 0 ) { std::cerr << "miss parameter!!" << endl; Usage(); return 1; } else { cout << "parameters--------" << endl; cout << " input file: " << input_file << endl; cout << " input format (cvs, l2r): " << input_type<< endl; cout << " config file: " << config_file << endl; cout << " act type(t for train,p for predict): " << act_type << endl; cout << " model file: " << model_file << endl; cout << " max dimention(for L2R format): " << dimention << endl; cout << endl; } Data data; DataReader dr; if ( input_type == "cvs") { if ( false == dr.ReadDataFromCVS(input_file, data)) { std::cerr << "error: read CVS file failed! " << input_file << std::endl; return 1; } } else { if ( false == dr.ReadDataFromL2R(input_file, data, dimention)) { std::cerr << "error: read L2R file failed! " << input_file << std::endl; return 1; } } GBDT gbdt; if (!gbdt.LoadConfig(config_file)) return 1; if (act_type == "t") { gbdt.Init(); gbdt.Train(data); gbdt.SaveWeights(model_file); } else if( act_type == "p" ) { T_VECTOR predictions; gbdt.LoadWeights(model_file); gbdt.PredictAllOutputs(data, predictions); //----output prediction---- std::ifstream fs; fs.open(input_file.c_str(), std::ios_base::in); std::string prediction_file = input_file + ".prediction"; std::fstream fs_out; fs_out.open(prediction_file.c_str(), std::ios_base::out); std::string strLine; unsigned int line_num = 0; while (getline(fs, strLine)) { if (strLine.length() < 2) { continue; } fs_out<< predictions[line_num] << std::endl; //for debug //cout << strLine << "\t" << predictions[line_num] << std::endl; line_num++; } fs.close(); } return 0; }