static void AssignRandomWeights(int learnSampleCount, TLearnContext* ctx, TFold* fold) { TVector<float> sampleWeights; sampleWeights.yresize(learnSampleCount); const ui64 randSeed = ctx->Rand.GenRand(); NPar::TLocalExecutor::TExecRangeParams blockParams(0, learnSampleCount); blockParams.SetBlockSize(10000); ctx->LocalExecutor.ExecRange([&](int blockIdx) { TFastRng64 rand(randSeed + blockIdx); rand.Advance(10); // reduce correlation between RNGs in different threads const float baggingTemperature = ctx->Params.ObliviousTreeOptions->BootstrapConfig->GetBaggingTemperature(); float* sampleWeightsData = sampleWeights.data(); NPar::TLocalExecutor::BlockedLoopBody(blockParams, [&rand, sampleWeightsData, baggingTemperature](int i) { const float w = -FastLogf(rand.GenRandReal1() + 1e-100); sampleWeightsData[i] = powf(w, baggingTemperature); })(blockIdx); }, 0, blockParams.GetBlockCount(), NPar::TLocalExecutor::WAIT_COMPLETE); TFold& ff = *fold; ff.AssignPermuted(sampleWeights, &ff.SampleWeights); if (!ff.LearnWeights.empty()) { for (int i = 0; i < learnSampleCount; ++i) { ff.SampleWeights[i] *= ff.LearnWeights[i]; } } const int approxDimension = ff.GetApproxDimension(); for (TFold::TBodyTail& bt : ff.BodyTailArr) { for (int dim = 0; dim < approxDimension; ++dim) { double* weightedDerData = bt.WeightedDer[dim].data(); const double* derData = bt.Derivatives[dim].data(); const float* sampleWeightsData = ff.SampleWeights.data(); ctx->LocalExecutor.ExecRange([=](int z) { weightedDerData[z] = derData[z] * sampleWeightsData[z]; }, NPar::TLocalExecutor::TExecRangeParams(bt.BodyFinish, bt.TailFinish).SetBlockSize(4000) , NPar::TLocalExecutor::WAIT_COMPLETE); } } }
static inline void BinarizeFloatFeature(int featureIdx, const TDocumentStorage& docStorage, const TDocSelector& docSelector, const TVector<float>& borders, ENanMode nanMode, NPar::TLocalExecutor& localExecutor, int floatFeatureIdx, TAllFeatures* features, bool* seenNans) { size_t docCount = docSelector.GetDocCount(); const TVector<float>& src = docStorage.Factors[featureIdx]; TVector<ui8>& hist = features->FloatHistograms[floatFeatureIdx]; hist.resize(docCount); ui8* histData = hist.data(); const float* featureBorderData = borders.data(); const int featureBorderSize = borders.ysize(); localExecutor.ExecRange([&] (int i) { const auto& featureVal = src[docSelector(i)]; if (IsNan(featureVal)) { *seenNans = true; histData[i] = nanMode == ENanMode::Min ? 0 : featureBorderSize; } else { int j = 0; while (j < featureBorderSize && featureVal > featureBorderData[j]) { ++histData[i]; ++j; } // histData[i] = LowerBound(featureBorderData, featureBorderData + featureBorderSize, featureVal) - featureBorderData; } } , NPar::TLocalExecutor::TExecRangeParams(0, docCount).SetBlockSize(1000) , NPar::TLocalExecutor::WAIT_COMPLETE); }
void dump_as_array(const int fd) final { osmium::io::detail::reliable_write(fd, reinterpret_cast<const char*>(m_vector.data()), byte_size()); }