예제 #1
0
static void AssignRandomWeights(int learnSampleCount,
                                TLearnContext* ctx,
                                TFold* fold) {
    TVector<float> sampleWeights;
    sampleWeights.yresize(learnSampleCount);

    const ui64 randSeed = ctx->Rand.GenRand();
    NPar::TLocalExecutor::TExecRangeParams blockParams(0, learnSampleCount);
    blockParams.SetBlockSize(10000);
    ctx->LocalExecutor.ExecRange([&](int blockIdx) {
        TFastRng64 rand(randSeed + blockIdx);
        rand.Advance(10); // reduce correlation between RNGs in different threads
        const float baggingTemperature = ctx->Params.ObliviousTreeOptions->BootstrapConfig->GetBaggingTemperature();
        float* sampleWeightsData = sampleWeights.data();
        NPar::TLocalExecutor::BlockedLoopBody(blockParams, [&rand, sampleWeightsData, baggingTemperature](int i) {
            const float w = -FastLogf(rand.GenRandReal1() + 1e-100);
            sampleWeightsData[i] = powf(w, baggingTemperature);
        })(blockIdx);
    }, 0, blockParams.GetBlockCount(), NPar::TLocalExecutor::WAIT_COMPLETE);

    TFold& ff = *fold;
    ff.AssignPermuted(sampleWeights, &ff.SampleWeights);
    if (!ff.LearnWeights.empty()) {
        for (int i = 0; i < learnSampleCount; ++i) {
            ff.SampleWeights[i] *= ff.LearnWeights[i];
        }
    }

    const int approxDimension = ff.GetApproxDimension();
    for (TFold::TBodyTail& bt : ff.BodyTailArr) {
        for (int dim = 0; dim < approxDimension; ++dim) {
            double* weightedDerData = bt.WeightedDer[dim].data();
            const double* derData = bt.Derivatives[dim].data();
            const float* sampleWeightsData = ff.SampleWeights.data();
            ctx->LocalExecutor.ExecRange([=](int z) {
                weightedDerData[z] = derData[z] * sampleWeightsData[z];
            }, NPar::TLocalExecutor::TExecRangeParams(bt.BodyFinish, bt.TailFinish).SetBlockSize(4000)
             , NPar::TLocalExecutor::WAIT_COMPLETE);
        }
    }
}
static inline void BinarizeFloatFeature(int featureIdx,
                                        const TDocumentStorage& docStorage,
                                        const TDocSelector& docSelector,
                                        const TVector<float>& borders,
                                        ENanMode nanMode,
                                        NPar::TLocalExecutor& localExecutor,
                                        int floatFeatureIdx,
                                        TAllFeatures* features,
                                        bool* seenNans) {
    size_t docCount = docSelector.GetDocCount();
    const TVector<float>& src = docStorage.Factors[featureIdx];
    TVector<ui8>& hist = features->FloatHistograms[floatFeatureIdx];

    hist.resize(docCount);

    ui8* histData = hist.data();
    const float* featureBorderData = borders.data();
    const int featureBorderSize = borders.ysize();

    localExecutor.ExecRange([&] (int i) {
        const auto& featureVal = src[docSelector(i)];
        if (IsNan(featureVal)) {
            *seenNans = true;
            histData[i] = nanMode == ENanMode::Min ? 0 : featureBorderSize;
        } else {
            int j = 0;
            while (j < featureBorderSize && featureVal > featureBorderData[j]) {
                ++histData[i];
                ++j;
            }
        //    histData[i] = LowerBound(featureBorderData, featureBorderData + featureBorderSize, featureVal) - featureBorderData;
        }
    }
    , NPar::TLocalExecutor::TExecRangeParams(0, docCount).SetBlockSize(1000)
    , NPar::TLocalExecutor::WAIT_COMPLETE);
}
예제 #3
0
 void dump_as_array(const int fd) final {
     osmium::io::detail::reliable_write(fd, reinterpret_cast<const char*>(m_vector.data()), byte_size());
 }