Exemplo n.º 1
0
double DelaunayInterpolator::_calculateFoldError(int fold, const vector<size_t>& indexes) const
{
  boost::shared_ptr<const DataFrame> originalDf = _df;
  boost::shared_ptr<DataFrame> copiedDf(new DataFrame());

  copiedDf->setFactorLabels(_df->getFactorLabels());
  copiedDf->setFactorTypes(_df->getFactorTypes());

  for (size_t i = 0; i < indexes.size(); i++)
  {
    if ((int)i % _kFold != fold)
    {
      copiedDf->addDataVector("", originalDf->getDataVector(indexes[i]));
    }
  }

  // make a new interpolator that only uses a subset of the data.
  DelaunayInterpolator uut;
  uut.setData(copiedDf);
  uut.setDependentColumns(_depColumnsLabels);
  uut.setIndependentColumns(_indColumnsLabels);

  double result = 0.0;

  for (size_t i = 0; i < indexes.size(); i++)
  {
    if ((int)i % _kFold == fold)
    {
      const vector<double>& v = originalDf->getDataVector(indexes[i]);
      const vector<double>& r = uut.interpolate(v);
      double e = 0.0;

      for (size_t j = 0; j < r.size(); j++)
      {
        double diff = fabs(r[j] - v[_depColumns[j]]);
        e += diff * diff;
      }
      result += e;
    }
  }

  return result;
}
  void simpleTest()
  {
    srand(0);
    DelaunayInterpolator uut;

    shared_ptr<DataFrame> dfPtr(new DataFrame());
    DataFrame& df = *dfPtr;
    vector<string> labels;
    labels.push_back("x");
    labels.push_back("y");
    labels.push_back("h");
    vector<int> types(3, DataFrame::Numerical);
    df.setFactorLabels(labels);
    df.setFactorTypes(types);

    // is there a cleaner way?
    vector<double> d(3);

    for (size_t i = 0; i < 500; i++)
    {
      d[0] = Random::generateUniform() * 2 - 1;
      d[1] = Random::generateUniform() * 2 - 1;
      double e = sqrt(d[0] * d[0] + d[1] * d[1]);
      d[2] = Normal::normal(e, .5);
      df.addDataVector("", d);
    }

    uut.setData(dfPtr);
    vector<string> ind;
    ind.push_back("x");
    ind.push_back("y");
    uut.setIndependentColumns(ind);
    vector<string> dep;
    dep.push_back("h");
    uut.setDependentColumns(dep);

    CPPUNIT_ASSERT_DOUBLES_EQUAL(0.0028, uut.estimateError(), 0.0001);
  }