void GBDT::fit(Problem const &Tr, Problem const &Va) { bias = calc_bias(Tr.Y); std::vector<float> F_Tr(Tr.nr_instance, bias), F_Va(Va.nr_instance, bias); Timer timer; printf("iter time tr_loss va_loss\n"); for(uint32_t t = 0; t < trees.size(); ++t) { timer.tic(); std::vector<float> const &Y = Tr.Y; std::vector<float> R(Tr.nr_instance), F1(Tr.nr_instance); #pragma omp parallel for schedule(static) for(uint32_t i = 0; i < Tr.nr_instance; ++i){ double tmp = Y[i]*F_Tr[i]; //tmp = (tmp < 1e-12)? 1e-12 : tmp ; //tmp = (tmp > 1e12)? 1e12 : tmp ; //R[i] = static_cast<float>(Y[i]/(1+exp(Y[i]*F_Tr[i]))); R[i] = static_cast<float>(Y[i]/(1+exp(tmp))); } trees[t].fit(Tr, R, F1); double Tr_loss = 0; #pragma omp parallel for schedule(static) reduction(+: Tr_loss) for(uint32_t i = 0; i < Tr.nr_instance; ++i) { F_Tr[i] += F1[i]; Tr_loss += log(1+exp(-Y[i]*F_Tr[i])); } Tr_loss /= static_cast<double>(Tr.nr_instance); #pragma omp parallel for schedule(static) for(uint32_t i = 0; i < Va.nr_instance; ++i) { std::vector<float> x = construct_instance(Va, i); F_Va[i] += trees[t].predict(x.data()).second; } double Va_loss = 0; #pragma omp parallel for schedule(static) reduction(+: Va_loss) for(uint32_t i = 0; i < Va.nr_instance; ++i) Va_loss += log(1+exp(-Va.Y[i]*F_Va[i])); Va_loss /= static_cast<double>(Va.nr_instance); printf("%4d %8.1f %10.5f %10.5f\n", t, timer.toc(), Tr_loss, Va_loss); fflush(stdout); } }
void GBDT::fit(Problem const &Tr, Problem const &Va) { bias = calc_bias(Tr.Y); std::vector<float> F_Tr(Tr.nr_instance, bias), F_Va(Va.nr_instance, bias); Timer timer; printf("iter time tr_loss va_loss\n"); // 感觉这里使用的是 logloss,即logit boost算法。 // 整体算法取自 The Elements of Statistical Learning Algorithm 10.3 for(uint32_t t = 0; t < trees.size(); ++t) { timer.tic(); std::vector<float> const &Y = Tr.Y; std::vector<float> R(Tr.nr_instance), F1(Tr.nr_instance); // 计算残差,也就是loss function的梯度下降 #pragma omp parallel for schedule(static) for(uint32_t i = 0; i < Tr.nr_instance; ++i) R[i] = static_cast<float>(Y[i]/(1+exp(Y[i]*F_Tr[i]))); // regression tree,去fit targets R[i] trees[t].fit(Tr, R, F1); // 计算整体的loss,并累加F_Tr(sum{y}) // 可以看到,这里没有使用权重alpha,也就是没有使用bootstrap re-sample double Tr_loss = 0; #pragma omp parallel for schedule(static) reduction(+: Tr_loss) for(uint32_t i = 0; i < Tr.nr_instance; ++i) { F_Tr[i] += F1[i]; Tr_loss += log(1+exp(-Y[i]*F_Tr[i])); } Tr_loss /= static_cast<double>(Tr.nr_instance); // 计算validation loss #pragma omp parallel for schedule(static) for(uint32_t i = 0; i < Va.nr_instance; ++i) { std::vector<float> x = construct_instance(Va, i); F_Va[i] += trees[t].predict(x.data()).second; } double Va_loss = 0; #pragma omp parallel for schedule(static) reduction(+: Va_loss) for(uint32_t i = 0; i < Va.nr_instance; ++i) Va_loss += log(1+exp(-Va.Y[i]*F_Va[i])); Va_loss /= static_cast<double>(Va.nr_instance); printf("%4d %8.1f %10.5f %10.5f\n", t, timer.toc(), Tr_loss, Va_loss); fflush(stdout); } }