void GBDT::LogLossProcess(DataVector *d, size_t samples, int i) { #ifdef USE_OPENMP #pragma omp parallel for #endif for (size_t j = 0; j < samples; ++j) { ValueType p = Predict(*(*d)[j], i); (*d)[j]->target = static_cast<ValueType>(LogitLossGradient((*d)[j]->label, p)); } if (g_conf.debug) { Auc auc; DataVector::iterator iter = d->begin(); for ( ; iter != d->end(); ++iter) { ValueType p = Logit(Predict(**iter, i)); auc.Add(p, (*iter)->label); } std::cout << "auc: " << auc.CalculateAuc() << std::endl; } }
void GBDT::Fit(DataVector *d) { delete[] trees; trees = new RegressionTree[g_conf.iterations]; size_t samples = d->size(); if (g_conf.data_sample_ratio < 1) { samples = static_cast<size_t>(d->size() * g_conf.data_sample_ratio); } Init(*d, d->size()); for (size_t i = 0; i < g_conf.iterations; ++i) { std::cout << "iteration: " << i << std::endl; if (samples < d->size()) { #ifndef USE_OPENMP std::random_shuffle(d->begin(), d->end()); #else __gnu_parallel::random_shuffle(d->begin(), d->end()); #endif } if (g_conf.loss == SQUARED_ERROR) { for (size_t j = 0; j < samples; ++j) { ValueType p = Predict(*(*d)[j], i); (*d)[j]->target = (*d)[j]->label - p; } if (g_conf.debug) { double s = 0; double c = 0; DataVector::iterator iter = d->begin(); for ( ; iter != d->end(); ++iter) { ValueType p = Predict(**iter, i); s += Squared((*iter)->label - p) * (*iter)->weight; c += (*iter)->weight; } std::cout << "rmse: " << std::sqrt(s / c) << std::endl; } } else if (g_conf.loss == LOG_LIKELIHOOD) { for (size_t j = 0; j < samples; ++j) { ValueType p = Predict(*(*d)[j], i); (*d)[j]->target = static_cast<ValueType>(LogitLossGradient((*d)[j]->label, p)); } if (g_conf.debug) { Auc auc; DataVector::iterator iter = d->begin(); for ( ; iter != d->end(); ++iter) { ValueType p = Logit(Predict(**iter, i)); auc.Add(p, (*iter)->label); } std::cout << "auc: " << auc.CalculateAuc() << std::endl; } } trees[i].Fit(d, samples); } // Calculate gain delete[] gain; gain = new double[g_conf.number_of_feature]; for (size_t i = 0; i < g_conf.number_of_feature; ++i) { gain[i] = 0.0; } for (size_t j = 0; j < iterations; ++j) { double *g = trees[j].GetGain(); for (size_t i = 0; i < g_conf.number_of_feature; ++i) { gain[i] += g[i]; } } }