/* compute the information needed to update the current context stats. */ void SkipCTS::getContextInfo(zobhash_t &hash, const indices_t &idxs) const { // compute the hash for (int k=0; k < idxs.size(); k++) { size_t x = idxs[k]; hash ^= s_zobtbl[x][m_context[x]]; } }
void tuner_t::map(const indices_t& indices, strings_t& values) const { assert(indices.size() == m_params.size()); assert(values.size() == m_params.size()); for (size_t i = 0; i < m_params.size(); ++ i) { const auto& param = m_params[i]; assert(indices[i] < param.m_values.size()); values[i] = param.m_values[indices[i]]; } }
int WeakDiscreteTreeLearner::findThreshold( const weights_t &weights, const indices_t & sortedIndices, const size_t featureIndex, double &errorMin, int &thresholdMin, int &alphaMin, int &splitIndex) const { const FeaturesResponses &featuresResponses = _trainingData->getFeatureResponses(); const FeaturesResponses::const_reference featureResponses = featuresResponses[featureIndex]; //getSum of data double sumPos = 0; double sumNeg = 0; errorMin = std::numeric_limits<int>::max(); for (size_t i = 0; i < sortedIndices.size(); ++i) { const int index = sortedIndices[i]; if (_classes[index] == _negativeClass) { sumNeg += weights[index]; } else { sumPos += weights[index]; } } if (_verbose > 3) { std::cout << "sumneg: " << sumNeg << " sumpos: " << sumPos << " both: " << sumPos + sumNeg << "\n"; } if (sumNeg == 0) { thresholdMin = featureResponses[sortedIndices[0]] - 1; alphaMin = -1; errorMin = 0; return -1; } if (sumPos == 0) { thresholdMin = featureResponses[sortedIndices[0]] - 1; alphaMin = 1; errorMin = 0; return -1; } //positives left double positiveLeftError = sumPos; double negativeLeftError = 0; //positives right double negativeRightError = sumNeg; double positiveRightError = 0; int minIndex = -1; // go over all sorted data elements for (size_t i = 0; i < sortedIndices.size(); ++i) { const int index = sortedIndices[i]; const int threshold = featureResponses[index]; if (_classes[index] == _negativeClass) { negativeLeftError += weights[index]; negativeRightError -= weights[index]; } else { positiveLeftError -= weights[index]; positiveRightError += weights[index]; } double error = 0; int alpha = 0; if (positiveLeftError + negativeLeftError < positiveRightError + negativeRightError) { alpha = 1; error = positiveLeftError + negativeLeftError; } else { alpha = -1; error = positiveRightError + negativeRightError; } bool cond = false; if (i < sortedIndices.size() - 1) { cond = (error < errorMin && threshold != featureResponses[sortedIndices[i+1]]); } else { cond = (error < errorMin) ; } if (cond) { errorMin = error; thresholdMin = threshold; alphaMin = alpha; minIndex = i; splitIndex = i + 1; } }// end of "for each sorted data element" //set the threshold between suceeding values, except the last one(+1) if (minIndex == int(sortedIndices.size() - 1)) { thresholdMin = thresholdMin + 1; } else if (thresholdMin + 1 == featureResponses[sortedIndices[minIndex+1]]) { thresholdMin = thresholdMin + 1; } else { thresholdMin = int((featureResponses[sortedIndices[minIndex]] + featureResponses[sortedIndices[minIndex+1]]) / 2.0); } if (_verbose > 3) { std::cout << "sorted data:\n"; for (size_t i = 0; i < sortedIndices.size(); ++i) { const int index = sortedIndices[i]; const int threshold = featureResponses[index]; std::cout << std::setw(6) << threshold << " "; } std::cout << "sorted classes:\n"; for (size_t i = 0; i < sortedIndices.size(); ++i) { int ind = sortedIndices[i]; std::cout << std::setprecision(2) << std::setw(6) << _classes[ind]*weights[ind] << " "; } std::cout << "threshold: " << thresholdMin << " alpha: " << alphaMin << " error: " << errorMin; } return 0; }
inline int WeakDiscreteTreeLearner::getErrorEstimate( const weights_t &weights, const indices_t& indices, const size_t featureIndex, int num_bins , const int minv, const int maxv, double &error) const { if ((maxv - minv) < num_bins) { num_bins = maxv - minv; } //std::cout << "binsize: " << binsize << std::endl; std::vector<double> bin_pos(num_bins + 1, 0), bin_neg(num_bins + 1, 0); double cumNeg = 0; double cumPos = 0; error = std::numeric_limits<double>::max(); const FeaturesResponses &featuresResponses = _trainingData->getFeatureResponses(); for (size_t i = 0; i < indices.size(); ++i) { const size_t trainingSampleIndex = indices[i]; //FIXME CHECK this //if (weights[trainingSampleIndex]< 1E-12) // continue; const int featureResponse = featuresResponses[featureIndex][trainingSampleIndex]; const int bin = int(num_bins / double(maxv - minv) * (featureResponse - minv)); //int splitpos = minv + (bin/(double)binsize) *(maxv-minv); //int bin2= binsize/maxv*((*_featureResp)[pos+ind[i]] - minv); //assert(bin==bin2); const int the_class = _classes[indices[i]]; if (the_class == _negativeClass) { bin_neg[bin] += weights[indices[i]]; cumNeg += weights[indices[i]]; } else { bin_pos[bin] += weights[indices[i]]; cumPos += weights[indices[i]]; } } //run test by setting this to return 0 with error 0 if (cumPos == 0 || cumNeg == 0) { return -1; } double //positives left positivesLeftError = cumPos, negativesLeftError = 0, //positives right negativesRightError = cumNeg, positivesRightError = 0; //int minbin = -1; for (int i = 0; i < num_bins; ++i) { positivesLeftError -= bin_pos[i]; negativesLeftError += bin_neg[i]; positivesRightError += bin_pos[i]; negativesRightError -= bin_neg[i]; double binError = 0; if (positivesLeftError + negativesLeftError < positivesRightError + negativesRightError) { binError = positivesLeftError + negativesLeftError; } else { binError = positivesRightError + negativesRightError; } // we keep the min error if (binError < error) { //minbin = i; error = binError; } } return 0; }