const vector<double>& KernelEstimationInterpolator::interpolate(const vector<double>& point) const { const DataFrame& df = *_df; _result.resize(_depColumns.size()); for (size_t i = 0; i < _result.size(); i++) { _result[i] = 0.0; } vector<double> simplePoint(_indColumns.size()); for (size_t i = 0; i < _indColumns.size(); i++) { simplePoint[i] = point[_indColumns[i]]; } double n0 = Normal::normal(0, _sigma); KnnIteratorNd it(_getIndex(), simplePoint); double wSum = 0.0; while (it.next() && it.getDistance() < _sigma * 3.0) { size_t i = it.getId(); const vector<double>& record = df.getDataVector(i); // figure out the distance between point and this data vector double d = 0; for (size_t j = 0; j < _indColumns.size(); j++) { double v = point[_indColumns[j]] - record[_indColumns[j]]; d += v * v; } d = sqrt(d); if (d / _sigma < 3.0) { // calculate the weight of this sample. double w = Normal::normal(d, _sigma) / n0; wSum += w; assert(w <= 1.000001); // calculate the contribution to the predicted value. for (size_t j = 0; j < _result.size(); j++) { _result[j] += (record[_depColumns[j]] * w); } } } // do less rubber sheeting as we get far away from tie points. wSum = std::max(wSum, n0); for (size_t j = 0; j < _result.size(); j++) { _result[j] /= wSum; } return _result; }
double KernelEstimationInterpolator::_estimateError(unsigned int index) const { const DataFrame& df = *_df; vector<double> predicted(_depColumns.size(), 0.0); const vector<double>& uut = df.getDataVector(index); vector<double> simplePoint(_indColumns.size()); for (size_t i = 0; i < _indColumns.size(); i++) { simplePoint[i] = uut[_indColumns[i]]; } double n0 = Normal::normal(0, _sigma); KnnIteratorNd it(_getIndex(), simplePoint); double wSum = 0.0; while (it.next() && it.getDistance() < _sigma * 3.0) { size_t i = it.getId(); if (i == index) { continue; } const vector<double>& record = df.getDataVector(i); // figure out the distance between point and this data vector double d = 0; for (size_t j = 0; j < _indColumns.size(); j++) { double v = uut[_indColumns[j]] - record[_indColumns[j]]; d += v * v; } d = sqrt(d); if (d / _sigma < 3.0) { // calculate the weight of this sample. double w = Normal::normal(d, _sigma) / n0; wSum += w; assert(w <= 1.000001); // calculate the contribution to the predicted value. for (size_t j = 0; j < predicted.size(); j++) { predicted[j] += (record[_depColumns[j]] * w); } } } // do less rubber sheeting as we get far away from tie points. wSum = std::max(wSum, n0); double result = 0.0; for (size_t j = 0; j < predicted.size(); j++) { //cout << "uut[_depColumns[" << j << "]] " << uut[_depColumns[j]] << endl; //cout << "predicted[" << j << "] " << predicted[j] << endl; double diff = uut[_depColumns[j]] - (predicted[j] / wSum); result += diff * diff; } result = sqrt(result); //cout << "wSum: " << wSum << " result: " << result << endl; return result; }
const vector<double>& IdwInterpolator::_interpolate(const vector<double>& point, int ignoreId) const { const DataFrame& df = *_df; _result.resize(_depColumns.size()); for (size_t i = 0; i < _result.size(); i++) { _result[i] = 0.0; } vector<double> simplePoint(_indColumns.size()); for (size_t i = 0; i < _indColumns.size(); i++) { simplePoint[i] = point[_indColumns[i]]; } KnnIteratorNd it(_getIndex(), simplePoint); double wSum = 0.0; int samples = 0; int iterations = 0; while (it.next() && samples < 50 && iterations <= _maxAllowedPerLoopOptimizationIterations) { size_t i = it.getId(); if ((int)i == ignoreId) { continue; } const vector<double>& record = df.getDataVector(i); // figure out the distance between point and this data vector double d = 0; for (size_t j = 0; j < _indColumns.size(); j++) { double v = point[_indColumns[j]] - record[_indColumns[j]]; d += v * v; } d = sqrt(d); // if the distance is zero then the weight is infinite and we don't need to look any further. if (d == 0) { wSum = 1.0; // Set the contribution equal to this value. for (size_t j = 0; j < _result.size(); j++) { _result[j] += record[_depColumns[j]]; } break; } // calculate the weight of this sample. double w = _calculateWeight(d); wSum += w; // calculate the contribution to the predicted value. for (size_t j = 0; j < _result.size(); j++) { _result[j] += (record[_depColumns[j]] * w); } iterations++; } if (iterations > _iterations) { _iterations = iterations; } for (size_t j = 0; j < _result.size(); j++) { _result[j] /= wSum; } return _result; }