const vector<double>& KernelEstimationInterpolator::interpolate(const vector<double>& point) const
{
  const DataFrame& df = *_df;

  _result.resize(_depColumns.size());
  for (size_t i = 0; i < _result.size(); i++)
  {
    _result[i] = 0.0;
  }

  vector<double> simplePoint(_indColumns.size());
  for (size_t i = 0; i < _indColumns.size(); i++)
  {
    simplePoint[i] = point[_indColumns[i]];
  }

  double n0 = Normal::normal(0, _sigma);

  KnnIteratorNd it(_getIndex(), simplePoint);
  double wSum = 0.0;
  while (it.next() && it.getDistance() < _sigma * 3.0)
  {
    size_t i = it.getId();
    const vector<double>& record = df.getDataVector(i);

    // figure out the distance between point and this data vector
    double d = 0;
    for (size_t j = 0; j < _indColumns.size(); j++)
    {
      double v = point[_indColumns[j]] - record[_indColumns[j]];
      d += v * v;
    }
    d = sqrt(d);
    if (d / _sigma < 3.0)
    {
      // calculate the weight of this sample.
      double w = Normal::normal(d, _sigma) / n0;
      wSum += w;

      assert(w <= 1.000001);

      // calculate the contribution to the predicted value.
      for (size_t j = 0; j < _result.size(); j++)
      {
        _result[j] += (record[_depColumns[j]] * w);
      }
    }
  }
  // do less rubber sheeting as we get far away from tie points.
  wSum = std::max(wSum, n0);

  for (size_t j = 0; j < _result.size(); j++)
  {
    _result[j] /= wSum;
  }

  return _result;
}
double KernelEstimationInterpolator::_estimateError(unsigned int index) const
{
  const DataFrame& df = *_df;

  vector<double> predicted(_depColumns.size(), 0.0);
  const vector<double>& uut = df.getDataVector(index);
  vector<double> simplePoint(_indColumns.size());
  for (size_t i = 0; i < _indColumns.size(); i++)
  {
    simplePoint[i] = uut[_indColumns[i]];
  }


  double n0 = Normal::normal(0, _sigma);

  KnnIteratorNd it(_getIndex(), simplePoint);
  double wSum = 0.0;
  while (it.next() && it.getDistance() < _sigma * 3.0)
  {
    size_t i = it.getId();
    if (i == index)
    {
      continue;
    }
    const vector<double>& record = df.getDataVector(i);

    // figure out the distance between point and this data vector
    double d = 0;
    for (size_t j = 0; j < _indColumns.size(); j++)
    {
      double v = uut[_indColumns[j]] - record[_indColumns[j]];
      d += v * v;
    }
    d = sqrt(d);
    if (d / _sigma < 3.0)
    {
      // calculate the weight of this sample.
      double w = Normal::normal(d, _sigma) / n0;
      wSum += w;

      assert(w <= 1.000001);

      // calculate the contribution to the predicted value.
      for (size_t j = 0; j < predicted.size(); j++)
      {
        predicted[j] += (record[_depColumns[j]] * w);
      }
    }
  }
  // do less rubber sheeting as we get far away from tie points.
  wSum = std::max(wSum, n0);

  double result = 0.0;
  for (size_t j = 0; j < predicted.size(); j++)
  {
    //cout << "uut[_depColumns[" << j << "]] " << uut[_depColumns[j]] << endl;
    //cout << "predicted[" << j << "] " << predicted[j] << endl;
    double diff = uut[_depColumns[j]] - (predicted[j] / wSum);
    result += diff * diff;
  }
  result = sqrt(result);

  //cout << "wSum: " << wSum << " result: " << result << endl;

  return result;
}
Example #3
0
const vector<double>& IdwInterpolator::_interpolate(const vector<double>& point, int ignoreId) const
{
  const DataFrame& df = *_df;

  _result.resize(_depColumns.size());
  for (size_t i = 0; i < _result.size(); i++)
  {
    _result[i] = 0.0;
  }

  vector<double> simplePoint(_indColumns.size());
  for (size_t i = 0; i < _indColumns.size(); i++)
  {
    simplePoint[i] = point[_indColumns[i]];
  }

  KnnIteratorNd it(_getIndex(), simplePoint);
  double wSum = 0.0;
  int samples = 0;
  int iterations = 0;
  while (it.next() && samples < 50 && iterations <= _maxAllowedPerLoopOptimizationIterations)
  {
    size_t i = it.getId();
    if ((int)i == ignoreId)
    {
      continue;
    }
    const vector<double>& record = df.getDataVector(i);

    // figure out the distance between point and this data vector
    double d = 0;
    for (size_t j = 0; j < _indColumns.size(); j++)
    {
      double v = point[_indColumns[j]] - record[_indColumns[j]];
      d += v * v;
    }
    d = sqrt(d);
    // if the distance is zero then the weight is infinite and we don't need to look any further.
    if (d == 0)
    {
      wSum = 1.0;

      // Set the contribution equal to this value.
      for (size_t j = 0; j < _result.size(); j++)
      {
        _result[j] += record[_depColumns[j]];
      }
      break;
    }

    // calculate the weight of this sample.
    double w = _calculateWeight(d);
    wSum += w;

    // calculate the contribution to the predicted value.
    for (size_t j = 0; j < _result.size(); j++)
    {
      _result[j] += (record[_depColumns[j]] * w);
    }

    iterations++;
  }
  if (iterations > _iterations)
  {
    _iterations = iterations;
  }

  for (size_t j = 0; j < _result.size(); j++)
  {
    _result[j] /= wSum;
  }

  return _result;
}