예제 #1
0
파일: adaboost.cpp 프로젝트: arnocandel/gbm
double CAdaBoost::Deviance(const CDataset& kData, const Bag& kBag,
                           const double* kFuncEstimate) {
  double loss = 0.0;
  double weight = 0.0;

  // Switch to validation set if necessary
  unsigned long num_of_rows_in_set = kData.get_size_of_set();

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : loss, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < num_of_rows_in_set; i++) {
    loss += kData.weight_ptr()[i] *
            std::exp(-(2 * kData.y_ptr()[i] - 1) *
                     (kData.offset_ptr()[i] + kFuncEstimate[i]));
    weight += kData.weight_ptr()[i];
  }

  // TODO: Check if weights are all zero for validation set
  if ((weight == 0.0) && (loss == 0.0)) {
    return nan("");
  } else if (weight == 0.0) {
    return HUGE_VAL;
  }

  return loss / weight;
}
예제 #2
0
파일: poisson.cpp 프로젝트: arnocandel/gbm
double CPoisson::InitF(const CDataset& kData) {
  double sum = 0.0;
  double denom = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, denom) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * kData.y_ptr()[i];
    denom += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
  }

  return std::log(sum / denom);
}
예제 #3
0
파일: gaussian.cpp 프로젝트: arnocandel/gbm
double CGaussian::InitF(const CDataset& kData) {
  double sum = 0.0;
  double totalweight = 0.0;

// compute the mean

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, totalweight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * (kData.y_ptr()[i] - kData.offset_ptr()[i]);
    totalweight += kData.weight_ptr()[i];
  }

  return sum / totalweight;
}
예제 #4
0
파일: adaboost.cpp 프로젝트: arnocandel/gbm
double CAdaBoost::InitF(const CDataset& kData) {
  double numerator = 0.0;
  double denominator = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : numerator, denominator) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (kData.y_ptr()[i] == 1.0) {
      numerator += kData.weight_ptr()[i] * std::exp(-kData.offset_ptr()[i]);
    } else {
      denominator += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
    }
  }

  return 0.5 * std::log(numerator / denominator);
}
예제 #5
0
파일: poisson.cpp 프로젝트: arnocandel/gbm
void CPoisson::FitBestConstant(const CDataset& kData, const Bag& kBag,
                               const double* kFuncEstimate,
                               unsigned long num_terminalnodes,
                               std::vector<double>& residuals,
                               CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  vector<double> numerator_vec(num_terminalnodes, 0.0);
  vector<double> denominator_vec(num_terminalnodes, 0.0);
  vector<double> max_vec(num_terminalnodes, -HUGE_VAL);
  vector<double> min_vec(num_terminalnodes, HUGE_VAL);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      numerator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * kData.y_ptr()[obs_num];
      denominator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(kData.offset_ptr()[obs_num] + kFuncEstimate[obs_num]);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (numerator_vec[node_num] == 0.0) {
        // DEBUG: if vecdNum==0 then prediction = -Inf
        // Not sure what else to do except plug in an arbitrary
        //   negative number, -1? -10? Let's use -1, then make
        //   sure |adF| < 19 always.
        tree.get_terminal_nodes()[node_num]->set_prediction(-19.0);
      } else if (denominator_vec[node_num] == 0.0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            std::log(numerator_vec[node_num] / denominator_vec[node_num]));
      }
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmin2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   19 - max_vec[node_num]));
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmax2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   -19 - min_vec[node_num]));
    }
  }
}
예제 #6
0
파일: gaussian.cpp 프로젝트: arnocandel/gbm
double CGaussian::BagImprovement(const CDataset& kData, const Bag& kBag,
                                 const double* kFuncEstimate,
                                 const double kShrinkage,
                                 const std::vector<double>& kDeltaEstimate) {
  double returnvalue = 0.0;
  double weight = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : returnvalue, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (!kBag.get_element(i)) {
      const double deltafunc_est = kFuncEstimate[i] + kData.offset_ptr()[i];

      returnvalue += kData.weight_ptr()[i] * kShrinkage * kDeltaEstimate[i] *
                     (2.0 * (kData.y_ptr()[i] - deltafunc_est) -
                      kShrinkage * kDeltaEstimate[i]);
      weight += kData.weight_ptr()[i];
    }
  }

  return returnvalue / weight;
}
예제 #7
0
파일: gaussian.cpp 프로젝트: arnocandel/gbm
void CGaussian::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                       const double* kFuncEstimate,
                                       std::vector<double>& residuals) {
  if (!(kData.y_ptr() && kFuncEstimate &&
        kData.weight_ptr())) {
    throw gbm_exception::InvalidArgument();
  }

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    residuals[i] = kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i];
  }
}
예제 #8
0
파일: adaboost.cpp 프로젝트: arnocandel/gbm
void CAdaBoost::FitBestConstant(const CDataset& kData, const Bag& kBag,
                                const double* kFuncEstimate,
                                unsigned long num_terminalnodes,
                                std::vector<double>& residuals,
                                CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  numerator_bestconstant_.resize(num_terminalnodes);
  numerator_bestconstant_.assign(numerator_bestconstant_.size(), 0.0);
  denominator_bestconstant_.resize(num_terminalnodes);
  denominator_bestconstant_.assign(denominator_bestconstant_.size(), 0.0);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      const double deltafunc_est =
          kFuncEstimate[obs_num] + kData.offset_ptr()[obs_num];
      numerator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * (2 * kData.y_ptr()[obs_num] - 1) *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
      denominator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (denominator_bestconstant_[node_num] == 0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            numerator_bestconstant_[node_num] /
            denominator_bestconstant_[node_num]);
      }
    }
  }
}
예제 #9
0
파일: gaussian.cpp 프로젝트: arnocandel/gbm
double CGaussian::Deviance(const CDataset& kData, const Bag& kBag,
                           const double* kFuncEstimate) {
  double loss = 0.0;
  double weight = 0.0;

  unsigned long num_rows_in_set = kData.get_size_of_set();
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : loss, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < num_rows_in_set; i++) {
    const double tmp =
        (kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i]);
    loss += kData.weight_ptr()[i] * tmp * tmp;
    weight += kData.weight_ptr()[i];
  }

  // TODO: Check if weights are all zero for validation set
  if ((weight == 0.0) && (loss == 0.0)) {
    return nan("");
  } else if (weight == 0.0) {
    return copysign(HUGE_VAL, loss);
  }

  return loss / weight;
}