Exemple #1
0
void CPoisson::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                      const double* kFuncEstimate,
                                      std::vector<double>& residuals) {
// compute working response
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    const double delta_func_est = kFuncEstimate[i] + kData.offset_ptr()[i];
    residuals[i] = kData.y_ptr()[i] - std::exp(delta_func_est);
  }
}
Exemple #2
0
void CAdaBoost::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                       const double* kFuncEstimate,
                                       std::vector<double>& residuals) {
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    residuals[i] = -(2 * kData.y_ptr()[i] - 1) *
                   std::exp(-(2 * kData.y_ptr()[i] - 1) *
                            (kData.offset_ptr()[i] + kFuncEstimate[i]));
  }
}
Exemple #3
0
double CPoisson::InitF(const CDataset& kData) {
  double sum = 0.0;
  double denom = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, denom) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * kData.y_ptr()[i];
    denom += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
  }

  return std::log(sum / denom);
}
Exemple #4
0
void CGaussian::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                       const double* kFuncEstimate,
                                       std::vector<double>& residuals) {
  if (!(kData.y_ptr() && kFuncEstimate &&
        kData.weight_ptr())) {
    throw gbm_exception::InvalidArgument();
  }

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    residuals[i] = kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i];
  }
}
Exemple #5
0
double CGaussian::InitF(const CDataset& kData) {
  double sum = 0.0;
  double totalweight = 0.0;

// compute the mean

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, totalweight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * (kData.y_ptr()[i] - kData.offset_ptr()[i]);
    totalweight += kData.weight_ptr()[i];
  }

  return sum / totalweight;
}
Exemple #6
0
double CAdaBoost::InitF(const CDataset& kData) {
  double numerator = 0.0;
  double denominator = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : numerator, denominator) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (kData.y_ptr()[i] == 1.0) {
      numerator += kData.weight_ptr()[i] * std::exp(-kData.offset_ptr()[i]);
    } else {
      denominator += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
    }
  }

  return 0.5 * std::log(numerator / denominator);
}
Exemple #7
0
void CPoisson::FitBestConstant(const CDataset& kData, const Bag& kBag,
                               const double* kFuncEstimate,
                               unsigned long num_terminalnodes,
                               std::vector<double>& residuals,
                               CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  vector<double> numerator_vec(num_terminalnodes, 0.0);
  vector<double> denominator_vec(num_terminalnodes, 0.0);
  vector<double> max_vec(num_terminalnodes, -HUGE_VAL);
  vector<double> min_vec(num_terminalnodes, HUGE_VAL);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      numerator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * kData.y_ptr()[obs_num];
      denominator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(kData.offset_ptr()[obs_num] + kFuncEstimate[obs_num]);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (numerator_vec[node_num] == 0.0) {
        // DEBUG: if vecdNum==0 then prediction = -Inf
        // Not sure what else to do except plug in an arbitrary
        //   negative number, -1? -10? Let's use -1, then make
        //   sure |adF| < 19 always.
        tree.get_terminal_nodes()[node_num]->set_prediction(-19.0);
      } else if (denominator_vec[node_num] == 0.0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            std::log(numerator_vec[node_num] / denominator_vec[node_num]));
      }
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmin2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   19 - max_vec[node_num]));
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmax2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   -19 - min_vec[node_num]));
    }
  }
}
Exemple #8
0
double CGaussian::BagImprovement(const CDataset& kData, const Bag& kBag,
                                 const double* kFuncEstimate,
                                 const double kShrinkage,
                                 const std::vector<double>& kDeltaEstimate) {
  double returnvalue = 0.0;
  double weight = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : returnvalue, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (!kBag.get_element(i)) {
      const double deltafunc_est = kFuncEstimate[i] + kData.offset_ptr()[i];

      returnvalue += kData.weight_ptr()[i] * kShrinkage * kDeltaEstimate[i] *
                     (2.0 * (kData.y_ptr()[i] - deltafunc_est) -
                      kShrinkage * kDeltaEstimate[i]);
      weight += kData.weight_ptr()[i];
    }
  }

  return returnvalue / weight;
}
Exemple #9
0
void CAdaBoost::FitBestConstant(const CDataset& kData, const Bag& kBag,
                                const double* kFuncEstimate,
                                unsigned long num_terminalnodes,
                                std::vector<double>& residuals,
                                CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  numerator_bestconstant_.resize(num_terminalnodes);
  numerator_bestconstant_.assign(numerator_bestconstant_.size(), 0.0);
  denominator_bestconstant_.resize(num_terminalnodes);
  denominator_bestconstant_.assign(denominator_bestconstant_.size(), 0.0);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      const double deltafunc_est =
          kFuncEstimate[obs_num] + kData.offset_ptr()[obs_num];
      numerator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * (2 * kData.y_ptr()[obs_num] - 1) *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
      denominator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (denominator_bestconstant_[node_num] == 0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            numerator_bestconstant_[node_num] /
            denominator_bestconstant_[node_num]);
      }
    }
  }
}