Ejemplo n.º 1
0
/* compute the information needed to update the current context stats. */
void SkipCTS::getContextInfo(zobhash_t &hash, const indices_t &idxs) const {

    // compute the hash
    for (int k=0; k < idxs.size(); k++) {
        size_t x = idxs[k];
        hash ^= s_zobtbl[x][m_context[x]];
    }
}
Ejemplo n.º 2
0
void tuner_t::map(const indices_t& indices, strings_t& values) const
{
        assert(indices.size() == m_params.size());
        assert(values.size() == m_params.size());

        for (size_t i = 0; i < m_params.size(); ++ i)
        {
                const auto& param = m_params[i];

                assert(indices[i] < param.m_values.size());
                values[i] = param.m_values[indices[i]];
        }
}
Ejemplo n.º 3
0
int WeakDiscreteTreeLearner::findThreshold(
        const weights_t &weights,
        const indices_t & sortedIndices,
        const size_t featureIndex,
        double &errorMin, int &thresholdMin, int &alphaMin, int &splitIndex) const
{

    const FeaturesResponses &featuresResponses = _trainingData->getFeatureResponses();
    const FeaturesResponses::const_reference featureResponses = featuresResponses[featureIndex];

    //getSum of data
    double sumPos = 0;
    double sumNeg = 0;
    errorMin = std::numeric_limits<int>::max();

    for (size_t i = 0; i < sortedIndices.size(); ++i)
    {
        const int index = sortedIndices[i];

        if (_classes[index] == _negativeClass)
        {
            sumNeg += weights[index];
        }
        else
        {
            sumPos += weights[index];
        }

    }

    if (_verbose > 3)
    {
        std::cout << "sumneg: " << sumNeg << " sumpos: " << sumPos << " both: " << sumPos + sumNeg << "\n";
    }

    if (sumNeg == 0)
    {
        thresholdMin = featureResponses[sortedIndices[0]] - 1;
        alphaMin = -1;
        errorMin = 0;
        return -1;
    }

    if (sumPos == 0)
    {
        thresholdMin = featureResponses[sortedIndices[0]] - 1;
        alphaMin =  1;
        errorMin = 0;
        return -1;
    }


    //positives left
    double positiveLeftError = sumPos;
    double negativeLeftError = 0;


    //positives right
    double negativeRightError = sumNeg;
    double positiveRightError = 0;
    int minIndex = -1;


    // go over all sorted data elements
    for (size_t i = 0; i < sortedIndices.size(); ++i)
    {
        const int index = sortedIndices[i];
        const int threshold = featureResponses[index];

        if (_classes[index] == _negativeClass)
        {
            negativeLeftError += weights[index];
            negativeRightError -= weights[index];
        }
        else
        {
            positiveLeftError -= weights[index];
            positiveRightError += weights[index];
        }


        double error = 0;
        int alpha = 0;

        if (positiveLeftError + negativeLeftError < positiveRightError + negativeRightError)
        {
            alpha = 1;
            error = positiveLeftError + negativeLeftError;
        }
        else
        {
            alpha = -1;
            error = positiveRightError + negativeRightError;

        }

        bool cond = false;

        if (i < sortedIndices.size() - 1)
        {
            cond = (error < errorMin && threshold != featureResponses[sortedIndices[i+1]]);
        }
        else
        {
            cond = (error < errorMin)	;
        }

        if (cond)
        {
            errorMin = error;
            thresholdMin = threshold;
            alphaMin = alpha;
            minIndex = i;
            splitIndex = i + 1;
        }

    }// end of "for each sorted data element"

    //set the threshold between suceeding values, except the last one(+1)
    if (minIndex ==  int(sortedIndices.size() - 1))
    {
        thresholdMin = thresholdMin + 1;
    }
    else if (thresholdMin + 1 == featureResponses[sortedIndices[minIndex+1]])
    {
        thresholdMin = thresholdMin + 1;
    }
    else
    {
        thresholdMin = int((featureResponses[sortedIndices[minIndex]] + featureResponses[sortedIndices[minIndex+1]]) / 2.0);
    }

    if (_verbose > 3)
    {
        std::cout << "sorted data:\n";

        for (size_t i = 0; i < sortedIndices.size(); ++i)
        {
            const int index = sortedIndices[i];
            const int threshold = featureResponses[index];
            std::cout << std::setw(6) << threshold << " ";
        }

        std::cout << "sorted classes:\n";

        for (size_t i = 0; i < sortedIndices.size(); ++i)
        {
            int ind = sortedIndices[i];
            std::cout << std::setprecision(2) << std::setw(6) << _classes[ind]*weights[ind] << " ";
        }

        std::cout << "threshold: " << thresholdMin << " alpha: " << alphaMin << " error: " << errorMin;
    }

    return 0;
}
Ejemplo n.º 4
0
inline
int WeakDiscreteTreeLearner::getErrorEstimate(
        const weights_t &weights,
        const indices_t& indices, const size_t featureIndex,
        int num_bins , const int minv, const int maxv, double &error) const
{
    if ((maxv - minv) < num_bins)
    {
        num_bins = maxv - minv;
    }

    //std::cout << "binsize: " << binsize << std::endl;
    std::vector<double> bin_pos(num_bins + 1, 0), bin_neg(num_bins + 1, 0);

    double cumNeg = 0;
    double cumPos = 0;
    error = std::numeric_limits<double>::max();

    const FeaturesResponses &featuresResponses = _trainingData->getFeatureResponses();

    for (size_t i = 0; i < indices.size(); ++i)
    {
        const size_t trainingSampleIndex = indices[i];
        //FIXME CHECK this
        //if (weights[trainingSampleIndex]< 1E-12)
        //    continue;
        const int featureResponse = featuresResponses[featureIndex][trainingSampleIndex];
        const int bin = int(num_bins / double(maxv - minv) * (featureResponse - minv));
        //int splitpos = minv + (bin/(double)binsize) *(maxv-minv);
        //int bin2= binsize/maxv*((*_featureResp)[pos+ind[i]] - minv);
        //assert(bin==bin2);
        const int the_class = _classes[indices[i]];

        if (the_class == _negativeClass)
        {
            bin_neg[bin] += weights[indices[i]];
            cumNeg += weights[indices[i]];

        }
        else
        {
            bin_pos[bin] += weights[indices[i]];
            cumPos += weights[indices[i]];

        }
    }

    //run test by setting this to return 0 with error 0
    if (cumPos == 0 || cumNeg == 0)
    {
        return -1;
    }

    double
            //positives left
            positivesLeftError = cumPos,
            negativesLeftError = 0,
            //positives right
            negativesRightError = cumNeg,
            positivesRightError = 0;
    //int minbin = -1;

    for (int i = 0; i < num_bins; ++i)
    {
        positivesLeftError -= bin_pos[i];
        negativesLeftError += bin_neg[i];

        positivesRightError += bin_pos[i];
        negativesRightError -= bin_neg[i];

        double binError = 0;

        if (positivesLeftError + negativesLeftError < positivesRightError + negativesRightError)
        {
            binError = positivesLeftError + negativesLeftError;
        }
        else
        {
            binError = positivesRightError + negativesRightError;
        }

        // we keep the min error
        if (binError < error)
        {
            //minbin = i;
            error = binError;
        }
    }

    return 0;
}