Ejemplo n.º 1
0
    void sanityCheck()
    {
      DataFrame df;
      populateSimpleDataFrame(df);

      CfsSubsetEvaluator uut;
      uut.setDataFrame(df);
      // these values seem reasonable, but have not been manually validated. If there is a 
      // discrepancy at some time these should be manually verified.
      ASSERT_DBL_EQL(.447214, uut.evaluateSubset(IntVec(0, 1, 2)));
      ASSERT_DBL_EQL(.707107, uut.evaluateSubset(IntVec(0, 1)));
      ASSERT_DBL_EQL(1, uut.evaluateSubset(IntVec(0)));
    }
Ejemplo n.º 2
0
  double CfsFitnessFunction::calculateFitness(const Genome& genome)
  {
    const CalculatorGenome& cg = dynamic_cast<const CalculatorGenome&>(genome);
    _workingCopy->setFactorType(_workingFactor, DataFrame::Numerical);
    double start = Time::getTime();
    //cout << cg.toString() << endl;
    // one tile per 3 minute
    const double MIN_EVAL_PER_SECOND = 250000 / 180;
    for (unsigned int i = 0; i < _workingCopy->getNumDataVectors(); i++)
    {
      double elapsed = Time::getTime() - start;
      if (elapsed >= 0.5)
      {
        double speed = i / elapsed;
        cout << "speed " << speed << "\r";
        if (speed < MIN_EVAL_PER_SECOND || elapsed >= 10)
        {
          //cout << endl << "Too slow: " << cg.toString() << endl;
          return -1e9;
        }
        cout.flush();
      }
      double v;
      v = cg.calculateValue(_workingUids[i]);
      _workingCopy->setDataElement(i, _workingFactor, v);
    }
    DataFrameDiscretizer dfd;
    try
    {
      dfd.discretize(*_workingCopy);
    }
    catch (const DataFrameDiscretizer::AllNullsException&)
    {
      // because of no valid values.
      return -1e9;
    }

    CfsSubsetEvaluator cse;
    cse.setDataFrame(*_workingCopy);
    std::vector<int> v;
    v.resize(_workingCopy->getNumFactors());
    for (unsigned int i = 0; i < _workingCopy->getNumFactors(); i++)
    {
      v[i] = i;
    }
    double score = cse.evaluateSubset(v) - _baseScore;
    int nodeCount = cg.countNodes();
    // penalize for large trees
    const int MAX_DESIRABLE_SIZE = 10;
    if (nodeCount > MAX_DESIRABLE_SIZE)
    {
      double m = 1 - pow(nodeCount - MAX_DESIRABLE_SIZE, 1.4) * .05;
      //cout << nodeCount << ", " << m << ", " << score << ", " << score * m << endl;
      if (m <= 0)
      {
        score = -numeric_limits<double>::infinity();
      }
      else if (score > 0.0)
      {
        score *= m;
      }
      else
      {
        score /= m;
      }
    }
    return score;
  }