コード例 #1
0
ファイル: gbdt.cpp プロジェクト: qiyiping/gbdt
void GBDT::LogLossProcess(DataVector *d, size_t samples, int i) {
#ifdef USE_OPENMP
#pragma omp parallel for
#endif
  for (size_t j = 0; j < samples; ++j) {
    ValueType p = Predict(*(*d)[j], i);
    (*d)[j]->target =
        static_cast<ValueType>(LogitLossGradient((*d)[j]->label, p));
  }

  if (g_conf.debug) {
    Auc auc;
    DataVector::iterator iter = d->begin();
    for ( ; iter != d->end(); ++iter) {
      ValueType p = Logit(Predict(**iter, i));
      auc.Add(p, (*iter)->label);
    }
    std::cout << "auc: " << auc.CalculateAuc() << std::endl;
  }
}
コード例 #2
0
ファイル: gbdt.cpp プロジェクト: litaoshao/gbdt
void GBDT::Fit(DataVector *d) {
  delete[] trees;
  trees = new RegressionTree[g_conf.iterations];

  size_t samples = d->size();
  if (g_conf.data_sample_ratio < 1) {
    samples = static_cast<size_t>(d->size() * g_conf.data_sample_ratio);
  }

  Init(*d, d->size());

  for (size_t i = 0; i < g_conf.iterations; ++i) {
    std::cout  << "iteration: " << i << std::endl;

    if (samples < d->size()) {
#ifndef USE_OPENMP
      std::random_shuffle(d->begin(), d->end());
#else
      __gnu_parallel::random_shuffle(d->begin(), d->end());
#endif
    }

    if (g_conf.loss == SQUARED_ERROR) {
      for (size_t j = 0; j < samples; ++j) {
        ValueType p = Predict(*(*d)[j], i);
        (*d)[j]->target = (*d)[j]->label - p;
      }

      if (g_conf.debug) {
        double s = 0;
        double c = 0;
        DataVector::iterator iter = d->begin();
        for ( ; iter != d->end(); ++iter) {
          ValueType p = Predict(**iter, i);
          s += Squared((*iter)->label - p) * (*iter)->weight;
          c += (*iter)->weight;
        }
        std::cout << "rmse: " << std::sqrt(s / c) << std::endl;
      }
    } else if (g_conf.loss == LOG_LIKELIHOOD) {
      for (size_t j = 0; j < samples; ++j) {
        ValueType p = Predict(*(*d)[j], i);
        (*d)[j]->target =
            static_cast<ValueType>(LogitLossGradient((*d)[j]->label, p));
      }

      if (g_conf.debug) {
        Auc auc;
        DataVector::iterator iter = d->begin();
        for ( ; iter != d->end(); ++iter) {
          ValueType p = Logit(Predict(**iter, i));
          auc.Add(p, (*iter)->label);
        }
        std::cout << "auc: " << auc.CalculateAuc() << std::endl;
      }
    }

    trees[i].Fit(d, samples);
  }


  // Calculate gain
  delete[] gain;
  gain = new double[g_conf.number_of_feature];

  for (size_t i = 0; i < g_conf.number_of_feature; ++i) {
    gain[i] = 0.0;
  }

  for (size_t j = 0; j < iterations; ++j) {
    double *g = trees[j].GetGain();
    for (size_t i = 0; i < g_conf.number_of_feature; ++i) {
      gain[i] += g[i];
    }
  }
}