예제 #1
0
static TVector<TFeaturePathElement> UnwindFeaturePath(const TVector<TFeaturePathElement>& oldFeaturePath, size_t eraseElementIdx) {
    const size_t pathLength = oldFeaturePath.size();
    CB_ENSURE(pathLength > 0, "Path to unwind must have at least one element");

    TVector<TFeaturePathElement> newFeaturePath(oldFeaturePath.begin(), oldFeaturePath.begin() + pathLength - 1);

    for (size_t elementIdx = eraseElementIdx; elementIdx < pathLength - 1; ++elementIdx) {
        newFeaturePath[elementIdx].Feature = oldFeaturePath[elementIdx + 1].Feature;
        newFeaturePath[elementIdx].ZeroPathsFraction = oldFeaturePath[elementIdx + 1].ZeroPathsFraction;
        newFeaturePath[elementIdx].OnePathsFraction = oldFeaturePath[elementIdx + 1].OnePathsFraction;
    }

    const double onePathsFraction = oldFeaturePath[eraseElementIdx].OnePathsFraction;
    const double zeroPathsFraction = oldFeaturePath[eraseElementIdx].ZeroPathsFraction;
    double weightDiff = oldFeaturePath[pathLength - 1].Weight;

    if (!FuzzyEquals(onePathsFraction, 0.0)) {
        for (int elementIdx = pathLength - 2; elementIdx >= 0; --elementIdx) {
            double oldWeight = newFeaturePath[elementIdx].Weight;
            newFeaturePath[elementIdx].Weight = weightDiff * pathLength / (onePathsFraction * (elementIdx + 1));
            weightDiff = oldWeight - newFeaturePath[elementIdx].Weight * zeroPathsFraction * (pathLength - elementIdx - 1) / pathLength;
        }
    } else {
        for (int elementIdx = pathLength - 2; elementIdx >= 0; --elementIdx) {
            newFeaturePath[elementIdx].Weight *= pathLength / (zeroPathsFraction * (pathLength - elementIdx - 1));
        }
    }


    return newFeaturePath;
}
예제 #2
0
파일: diff.cpp 프로젝트: iamnik13/catboost
 TConstArrayRef<T> Remap(const TConstArrayRef<ui64>& keys) {
     if (keys.empty()) {
         return TConstArrayRef<T>();
     }
     Y_ASSERT(keys.begin() >= Keys.begin() && keys.begin() <= Keys.end());
     Y_ASSERT(keys.end() >= Keys.begin() && keys.end() <= Keys.end());
     return TConstArrayRef<T>(Words[keys.begin() - Keys.begin()].begin(), Words[keys.end() - Keys.begin() - 1].end());
 }
예제 #3
0
TVector<TVector<double>> PrepareEval(const EPredictionType predictionType,
                                     const TVector<TVector<double>>& approx,
                                     NPar::TLocalExecutor* localExecutor) {
    TVector<TVector<double>> result;
    switch (predictionType) {
        case EPredictionType::Probability:
            if (IsMulticlass(approx)) {
                result = CalcSoftmax(approx, localExecutor);
            } else {
                result = {CalcSigmoid(approx[0])};
            }
            break;
        case EPredictionType::Class:
            result.resize(1);
            result[0].reserve(approx.size());
            if (IsMulticlass(approx)) {
                TVector<int> predictions = {SelectBestClass(approx, localExecutor)};
                result[0].assign(predictions.begin(), predictions.end());
            } else {
                for (const double prediction : approx[0]) {
                    result[0].push_back(prediction > 0);
                }
            }
            break;
        case EPredictionType::RawFormulaVal:
            result = approx;
            break;
        default:
            Y_ASSERT(false);
    }
    return result;
}
예제 #4
0
TVector<std::pair<double, TFeature>> CalcFeatureEffect(const TFullModel& model, const TPool& pool, int threadCount/*= 1*/) {
    CB_ENSURE(pool.Docs.GetDocCount() != 0, "Pool should not be empty");
    if (model.GetTreeCount() == 0) {
        return TVector<std::pair<double, TFeature>>();
    }
    int featureCount = pool.Docs.GetFactorsCount();
    NJson::TJsonValue jsonParams = ReadTJsonValue(model.ModelInfo.at("params"));
    jsonParams["system_options"].InsertValue("thread_count", threadCount);
    TCommonContext ctx(jsonParams, Nothing(), Nothing(), featureCount, pool.CatFeatures, pool.FeatureId);

    CB_ENSURE(model.GetTreeCount() != 0, "model should not be empty");
    CB_ENSURE(pool.Docs.GetFactorsCount() > 0, "no features in pool");
    TVector<TFeature> features;
    TVector<TMxTree> trees = BuildMatrixnetTrees(model, &features);

    TVector<TVector<ui64>> leavesStatistics = CollectLeavesStatistics(pool, model);
    TVector<double> effect = CalcEffect(trees, leavesStatistics);
    TVector<std::pair<double, int>> effectWithFeature;
    for (int i = 0; i < effect.ysize(); ++i) {
        effectWithFeature.emplace_back(effect[i], i);
    }
    Sort(effectWithFeature.begin(), effectWithFeature.end(), std::greater<std::pair<double, int>>());

    TVector<std::pair<double, TFeature>> result;
    for (int i = 0; i < effectWithFeature.ysize(); ++i) {
        result.emplace_back(effectWithFeature[i].first, features[effectWithFeature[i].second]);
    }
    return result;
}
예제 #5
0
		void set_points(const TVector &x, const TVector &y)
		{
			assert(x.size() == y.size());
			assert(x.size()>2);
			int	n =static_cast<int>(x.size());

			m_x.assign(x.begin(), x.end());
			m_c0.assign(y.begin(), y.end());
			m_c1.resize(n);
			m_c2.resize(n);
			m_c3.resize(n);

			Vector<T, e_host> m_h(n);
			Vector<T, e_host> m_l(n);
			Vector<T, e_host> m_mu(n);
			Vector<T, e_host> m_z(n);

			n--;

			for(auto i = 0; i < n; i++)
			{
				m_h[i] = m_x[i+1]-m_x[i];
			}

			m_l[0] = 1;
			m_mu[0] = 0;
			m_z[0] = 0;

			for(auto i = 1; i < n; i++)
			{
				m_l[i] = 2*(m_x[i+1]-m_x[i-1])-m_h[i-1]*m_mu[i-1];
				m_mu[i] = m_h[i]/m_l[i];
				auto alpha = 3*(m_c0[i+1]-m_c0[i])/m_h[i]-3*(m_c0[i]-m_c0[i-1])/m_h[i-1];
				m_z[i] = (alpha-m_h[i-1]*m_z[i-1])/m_l[i];
			}

			m_l[n] = 1;
			m_z[n] = 0;
			m_c2[n] = 0;

			for(auto i =n-1; i >= 0; i--)
			{
				m_c2[i] = m_z[i] - m_mu[i]*m_c2[i+1];
				m_c1[i] = (m_c0[i+1]-m_c0[i])/m_h[i]-m_h[i]*(m_c2[i+1]+2*m_c2[i])/3;
				m_c3[i] = (m_c2[i+1]-m_c2[i])/(3*m_h[i]);
			}
		}
예제 #6
0
static TVector<TFeaturePathElement> ExtendFeaturePath(const TVector<TFeaturePathElement>& oldFeaturePath,
                                                      double zeroPathsFraction,
                                                      double onePathsFraction,
                                                      int feature) {
    const size_t pathLength = oldFeaturePath.size();

    TVector<TFeaturePathElement> newFeaturePath(pathLength + 1);
    Copy(oldFeaturePath.begin(), oldFeaturePath.begin() + pathLength, newFeaturePath.begin());

    const double weight = pathLength == 0 ? 1.0 : 0.0;
    newFeaturePath[pathLength] = TFeaturePathElement(feature, zeroPathsFraction, onePathsFraction, weight);

    for (int elementIdx = pathLength - 1; elementIdx >= 0; --elementIdx) {
        newFeaturePath[elementIdx + 1].Weight += onePathsFraction * newFeaturePath[elementIdx].Weight * (elementIdx + 1) / (pathLength + 1);
        newFeaturePath[elementIdx].Weight = zeroPathsFraction * newFeaturePath[elementIdx].Weight * (pathLength - elementIdx) / (pathLength + 1);
    }

    return newFeaturePath;
}
예제 #7
0
void CalcSoftmax(const TVector<double>& approx, TVector<double>* softmax) {
    double maxApprox = *MaxElement(approx.begin(), approx.end());
    double sumExpApprox = 0;
    for (int dim = 0; dim < approx.ysize(); ++dim) {
        double expApprox = exp(approx[dim] - maxApprox);
        (*softmax)[dim] = expApprox;
        sumExpApprox += expApprox;
    }
    for (auto& curSoftmax : *softmax) {
        curSoftmax /= sumExpApprox;
    }
}
예제 #8
0
void TestsOfTheRevolutionWillNotFallSilent(){
	TVector<int> a;
	TVector<int> b;
	for (int i=0; i<20; ++i){
		a.push_back(i);
		b.push_back(20-i);
	}
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	cout << "Popback for both" << endl;
	a.pop_back();
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	cout << "Swhap" << endl;
	a.Swap(b);
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	cout << "INSERTIONNN" << endl;
	a.insert(a.begin()+2, 9);
	b.insert(b.begin()+2, 0);
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	cout << "More insertion!" << endl;
	a.insert(a.begin(), 3, 4);
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	cout << "Destroy the lesser middle nodes!" << endl;
	a.erase(a.begin()+7);
	b.erase(b.begin()+6, b.begin()+8);
	cout << "a: ";
	print(a);
	cout << "b: ";
	print(b);
	system("pause");
};
예제 #9
0
TWxTestResult WxTest(const TVector<double>& baseline,
                     const TVector<double>& test) {
    TVector<double> diffs;

    for (ui32 i = 0; i < baseline.size(); i++) {
        const double i1 = baseline[i];
        const double i2 = test[i];
        const double diff = i1 - i2;
        if (diff != 0) {
            diffs.push_back(diff);
        }
    }

    if (diffs.size() < 2) {
        TWxTestResult result;
        result.PValue = 0.5;
        result.WMinus = result.WPlus = 0;
        return result;
    }

    Sort(diffs.begin(), diffs.end(), [&](double x, double y) {
        return Abs(x) < Abs(y);
    });

    double w_plus = 0;
    double w_minus = 0;
    double n = diffs.size();


    for (int i = 0; i < n; ++i) {
        double sum = 0;
        double weight = 0;
        int j = i;
        double signPlus = 0;
        double signMinus = 0;

        for (j = i; j < n && diffs[j] == diffs[i]; ++j) {
            sum += (j + 1);
            ++weight;
            signPlus += diffs[i] >= 0;
            signMinus += diffs[i] < 0;
        }

        const double meanRank = sum / weight;
        w_plus += signPlus * meanRank;
        w_minus += signMinus * meanRank;

        i = j - 1;
    }

    TWxTestResult result;
    result.WPlus = w_plus;
    result.WMinus = w_minus;

    const double w = result.WPlus - result.WMinus;
    if (n > 16) {
        double z = w / sqrt(n * (n + 1) * (2 * n + 1) * 1.0 / 6);
        result.PValue = 2 * (1.0 - NormalCDF(Abs(z)));
    } else {
        result.PValue = 2 * CalcLevelOfSignificanceWXMPSR(Abs(w), (int) n);
    }
    result.PValue = 1.0 - result.PValue;
    return result;
}
예제 #10
0
 iterator begin() {
     return m_vector.begin();
 }
예제 #11
0
void print(TVector<T> v){
	for(TVector<int>::Iterator it=v.begin(); it!=v.end(); ++it)
		cout << ' ' << *it;
	cout << endl;
};
예제 #12
0
void TEvalResult::SetPredictionTypes(const TVector<EPredictionType>& predictionTypes_) {
    PredictionTypes.clear();
    PredictionTypes.assign(predictionTypes_.begin(), predictionTypes_.end());
}
예제 #13
0
// Select the best matching function for 'call' from 'candidateList'.
//
// Assumptions
//
// There is no exact match, so a selection algorithm needs to run. That is, the
// language-specific handler should check for exact match first, to
// decide what to do, before calling this selector.
//
// Input
//
//  * list of candidate signatures to select from
//  * the call
//  * a predicate function convertible(from, to) that says whether or not type
//    'from' can implicitly convert to type 'to' (it includes the case of what
//    the calling language would consider a matching type with no conversion
//    needed)
//  * a predicate function better(from1, from2, to1, to2) that says whether or
//    not a conversion from <-> to2 is considered better than a conversion
//    from <-> to1 (both in and out directions need testing, as declared by the
//    formal parameter)
//
// Output
//
//  * best matching candidate (or none, if no viable candidates found)
//  * whether there was a tie for the best match (ambiguous overload selection,
//    caller's choice for how to report)
//
const TFunction* TParseContextBase::selectFunction(
    const TVector<const TFunction*> candidateList,
    const TFunction& call,
    std::function<bool(const TType& from, const TType& to, TOperator op, int arg)> convertible,
    std::function<bool(const TType& from, const TType& to1, const TType& to2)> better,
    /* output */ bool& tie)
{
//
// Operation
//
// 1. Prune the input list of candidates down to a list of viable candidates,
// where each viable candidate has
//
//  * at least as many parameters as there are calling arguments, with any
//    remaining parameters being optional or having default values
//  * each parameter is true under convertible(A, B), where A is the calling
//    type for in and B is the formal type, and in addition, for out B is the
//    calling type and A is the formal type
//
// 2. If there are no viable candidates, return with no match.
//
// 3. If there is only one viable candidate, it is the best match.
//
// 4. If there are multiple viable candidates, select the first viable candidate
// as the incumbent. Compare the incumbent to the next viable candidate, and if
// that candidate is better (bullets below), make it the incumbent. Repeat, with
// a linear walk through the viable candidate list. The final incumbent will be
// returned as the best match. A viable candidate is better than the incumbent if
//
//  * it has a function argument with a better(...) conversion than the incumbent,
//    for all directions needed by in and out
//  * the incumbent has no argument with a better(...) conversion then the
//    candidate, for either in or out (as needed)
//
// 5. Check for ambiguity by comparing the best match against all other viable
// candidates. If any other viable candidate has a function argument with a
// better(...) conversion than the best candidate (for either in or out
// directions), return that there was a tie for best.
//

    tie = false;

    // 1. prune to viable...
    TVector<const TFunction*> viableCandidates;
    for (auto it = candidateList.begin(); it != candidateList.end(); ++it) {
        const TFunction& candidate = *(*it);

        // to even be a potential match, number of arguments must be >= the number of
        // fixed (non-default) parameters, and <= the total (including parameter with defaults).
        if (call.getParamCount() < candidate.getFixedParamCount() ||
            call.getParamCount() > candidate.getParamCount())
            continue;

        // see if arguments are convertible
        bool viable = true;

        // The call can have fewer parameters than the candidate, if some have defaults.
        const int paramCount = std::min(call.getParamCount(), candidate.getParamCount());
        for (int param = 0; param < paramCount; ++param) {
            if (candidate[param].type->getQualifier().isParamInput()) {
                if (! convertible(*call[param].type, *candidate[param].type, candidate.getBuiltInOp(), param)) {
                    viable = false;
                    break;
                }
            }
            if (candidate[param].type->getQualifier().isParamOutput()) {
                if (! convertible(*candidate[param].type, *call[param].type, candidate.getBuiltInOp(), param)) {
                    viable = false;
                    break;
                }
            }
        }

        if (viable)
            viableCandidates.push_back(&candidate);
    }

    // 2. none viable...
    if (viableCandidates.size() == 0)
        return nullptr;

    // 3. only one viable...
    if (viableCandidates.size() == 1)
        return viableCandidates.front();

    // 4. find best...
    const auto betterParam = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
        // is call -> can2 better than call -> can1 for any parameter
        bool hasBetterParam = false;
        for (int param = 0; param < call.getParamCount(); ++param) {
            if (better(*call[param].type, *can1[param].type, *can2[param].type)) {
                hasBetterParam = true;
                break;
            }
        }
        return hasBetterParam;
    };

    const auto equivalentParams = [&call, &better](const TFunction& can1, const TFunction& can2) -> bool {
        // is call -> can2 equivalent to call -> can1 for all the call parameters?
        for (int param = 0; param < call.getParamCount(); ++param) {
            if (better(*call[param].type, *can1[param].type, *can2[param].type) ||
                better(*call[param].type, *can2[param].type, *can1[param].type))
                return false;
        }
        return true;
    };

    const TFunction* incumbent = viableCandidates.front();
    for (auto it = viableCandidates.begin() + 1; it != viableCandidates.end(); ++it) {
        const TFunction& candidate = *(*it);
        if (betterParam(*incumbent, candidate) && ! betterParam(candidate, *incumbent))
            incumbent = &candidate;
    }

    // 5. ambiguity...
    for (auto it = viableCandidates.begin(); it != viableCandidates.end(); ++it) {
        if (incumbent == *it)
            continue;
        const TFunction& candidate = *(*it);

        // In the case of default parameters, it may have an identical initial set, which is
        // also ambiguous
        if (betterParam(*incumbent, candidate) || equivalentParams(*incumbent, candidate))
            tie = true;
    }

    return incumbent;
}
예제 #14
0
static void CalcShapValuesRecursive(const TObliviousTrees& forest,
                                    const TVector<int>& binFeaturesMapping,
                                    const TVector<ui8>& binFeaturesValues,
                                    size_t treeIdx,
                                    int depth,
                                    const TVector<TVector<size_t>>& subtreeSizes,
                                    int dimension,
                                    size_t nodeIdx,
                                    const TVector<TFeaturePathElement>& oldFeaturePath,
                                    double zeroPathsFraction,
                                    double onePathsFraction,
                                    int feature,
                                    TVector<double>* shapValuesPtr) {
    TVector<double>& shapValues = *shapValuesPtr;
    TVector<TFeaturePathElement> featurePath = ExtendFeaturePath(oldFeaturePath, zeroPathsFraction, onePathsFraction, feature);

    if (depth == forest.TreeSizes[treeIdx]) {
        for (size_t elementIdx = 1; elementIdx < featurePath.size(); ++elementIdx) {
            TVector<TFeaturePathElement> unwoundPath = UnwindFeaturePath(featurePath, elementIdx);
            double weightSum = 0.0;
            for (const TFeaturePathElement& unwoundPathElement : unwoundPath) {
                weightSum += unwoundPathElement.Weight;
            }
            const TFeaturePathElement& element = featurePath[elementIdx];
            const int approxDimension = forest.ApproxDimension;

            shapValues[element.Feature] += weightSum * (element.OnePathsFraction - element.ZeroPathsFraction)
                                         * forest.LeafValues[treeIdx][nodeIdx * approxDimension + dimension];
        }
    } else {
        const TRepackedBin& split = forest.GetRepackedBins()[forest.TreeStartOffsets[treeIdx] + depth];
        const int splitBinFeature = split.FeatureIndex;
        const int splitFlatFeature = binFeaturesMapping[splitBinFeature];
        const ui8 threshold = split.SplitIdx;
        const ui8 xorMask = split.XorMask;

        double newZeroPathsFraction = 1.0;
        double newOnePathsFraction = 1.0;

        const auto sameFeatureElement = FindIf(featurePath.begin(), featurePath.end(), [splitFlatFeature](const TFeaturePathElement& element) {return element.Feature == splitFlatFeature;});

        if (sameFeatureElement != featurePath.end()) {
            const size_t sameFeatureIndex = sameFeatureElement - featurePath.begin();
            newZeroPathsFraction = featurePath[sameFeatureIndex].ZeroPathsFraction;
            newOnePathsFraction = featurePath[sameFeatureIndex].OnePathsFraction;
            featurePath = UnwindFeaturePath(featurePath, sameFeatureIndex);
        }

        const size_t goNodeIdx = nodeIdx | (((binFeaturesValues[splitBinFeature] ^ xorMask) >= threshold) << depth);
        const size_t skipNodeIdx = goNodeIdx ^ (1 << depth);

        if (subtreeSizes[depth + 1][goNodeIdx] > 0) {
            CalcShapValuesRecursive(forest, binFeaturesMapping, binFeaturesValues, treeIdx, depth + 1, subtreeSizes, dimension,
                                    goNodeIdx, featurePath,
                                    newZeroPathsFraction * subtreeSizes[depth + 1][goNodeIdx] / subtreeSizes[depth][nodeIdx],
                                    newOnePathsFraction, splitFlatFeature,
                                    &shapValues);
        }

        if (subtreeSizes[depth + 1][skipNodeIdx] > 0) {
            CalcShapValuesRecursive(forest, binFeaturesMapping, binFeaturesValues, treeIdx, depth + 1, subtreeSizes, dimension,
                                    skipNodeIdx, featurePath,
                                    newZeroPathsFraction * subtreeSizes[depth + 1][skipNodeIdx] / subtreeSizes[depth][nodeIdx],
                                    /*onePathFraction*/ 0, splitFlatFeature,
                                    &shapValues);
        }
    }
}
예제 #15
0
파일: diff.cpp 프로젝트: iamnik13/catboost
 TConstArrayRef<ui64> GetKeys() const {
     return TConstArrayRef<ui64>(Keys.begin(), Keys.end());
 }