void Tree::bootstrapWeighted() { // Use fraction (default 63.21%) of the samples size_t num_samples_inbag = (size_t) num_samples * sample_fraction; // Reserve space, reserve a little more to be save) sampleIDs[0].reserve(num_samples_inbag); oob_sampleIDs.reserve(num_samples * (exp(-sample_fraction) + 0.1)); std::discrete_distribution<> weighted_dist(case_weights->begin(), case_weights->end()); // Start with all samples OOB inbag_counts.resize(num_samples, 0); // Draw num_samples samples with replacement (n out of n) as inbag and mark as not OOB for (size_t s = 0; s < num_samples_inbag; ++s) { size_t draw = weighted_dist(random_number_generator); sampleIDs[0].push_back(draw); ++inbag_counts[draw]; } // Save OOB samples for (size_t s = 0; s < inbag_counts.size(); ++s) { if (inbag_counts[s] == 0) { oob_sampleIDs.push_back(s); } } num_samples_oob = oob_sampleIDs.size(); if (!keep_inbag) { inbag_counts.clear(); } }
void drawWithoutReplacementWeighted(std::vector<size_t>& result, std::mt19937_64& random_number_generator, std::vector<size_t>& indizes, size_t num_samples, std::vector<double>& weights) { result.reserve(num_samples); // Set all to not selected std::vector<bool> temp; temp.resize(indizes.size(), false); std::discrete_distribution<> weighted_dist(weights.begin(), weights.end()); for (size_t i = 0; i < num_samples; ++i) { size_t draw; do { draw = weighted_dist(random_number_generator); } while (temp[draw]); temp[draw] = true; result.push_back(indizes[draw]); } }