コード例 #1
0
  void CfsSubsetEvaluator::setDataFrame(const DataFrame& dataFrame, TgsProgress* progress)
  {
    TgsProgress* discretizeProgress = NULL;
    TgsProgress* classCorrelationProgress = NULL;
    TgsProgress* correlationMatrixProgress = NULL;
    if (progress)
    {
      discretizeProgress = progress->createTgsChild("Discretizing Data Frame", .5);
      classCorrelationProgress = progress->createTgsChild("Calculating Class Correlation", .1);
      correlationMatrixProgress = progress->createTgsChild("Calculating Correlation Matrix", .4);
    }

    _df = dataFrame;
    DataFrameDiscretizer dfd;
    dfd.discretize(_df, discretizeProgress);
    _calculateClassCorrelations(classCorrelationProgress);
    _calculateCorrelationMatrix(correlationMatrixProgress);

//     cout << "class corr: " << _classCorr << endl;
//     cout << "corr matrix: " << endl;
//     for (unsigned int i = 0; i < _corrMatrix.size(); i++)
//     {
//       cout << _corrMatrix[i] << endl;
//     }
    _df.clear();
  }
コード例 #2
0
 double FeatureScoreFitnessFunction::calculateFitness(const Genome& genome)
 {
   const CalculatorGenome& cg = dynamic_cast<const CalculatorGenome&>(genome);
   _workingCopy->setFactorType(0, DataFrame::Numerical);
   for (unsigned int i = 0; i < _workingCopy->getNumDataVectors(); i++)
   {
     _workingCopy->setDataElement(i, 0, cg.calculateValue(i));
   }
   DataFrameDiscretizer dfd;
   try
   {
     dfd.discretize(*_workingCopy);
   }
   catch (const Tgs::Exception&)
   {
     // probably because of no valid values.
     return 0.0;
   }
   double score = _fsc->calculateScore(*_workingCopy, -1, *_workingCopy, 0);
   int nodeCount = cg.countNodes();
   if (nodeCount > 10)
   {
     score *= pow(.95, nodeCount - 10);
   }
   return score;
 }
コード例 #3
0
  void ConsistencySubsetEvaluator::setDataFrame(const DataFrame& dataFrame, TgsProgress* progress) 
  {
    _dataFrame = dataFrame;
    DataFrameDiscretizer dfd;
    dfd.discretize(_dataFrame, progress);

    _enumCnt = 0;
    for (unsigned int i = 0; i < _dataFrame.getNumDataVectors(); i++)
    {
      string className = _dataFrame.getTrainingLabel(i);
      if (_enumMap.find(className) == _enumMap.end())
      {
        _enumMap[className] = _enumCnt++;
      }
    }

    _putDataIntoBins();
  }
コード例 #4
0
  double CfsFitnessFunction::calculateFitness(const Genome& genome)
  {
    const CalculatorGenome& cg = dynamic_cast<const CalculatorGenome&>(genome);
    _workingCopy->setFactorType(_workingFactor, DataFrame::Numerical);
    double start = Time::getTime();
    //cout << cg.toString() << endl;
    // one tile per 3 minute
    const double MIN_EVAL_PER_SECOND = 250000 / 180;
    for (unsigned int i = 0; i < _workingCopy->getNumDataVectors(); i++)
    {
      double elapsed = Time::getTime() - start;
      if (elapsed >= 0.5)
      {
        double speed = i / elapsed;
        cout << "speed " << speed << "\r";
        if (speed < MIN_EVAL_PER_SECOND || elapsed >= 10)
        {
          //cout << endl << "Too slow: " << cg.toString() << endl;
          return -1e9;
        }
        cout.flush();
      }
      double v;
      v = cg.calculateValue(_workingUids[i]);
      _workingCopy->setDataElement(i, _workingFactor, v);
    }
    DataFrameDiscretizer dfd;
    try
    {
      dfd.discretize(*_workingCopy);
    }
    catch (const DataFrameDiscretizer::AllNullsException&)
    {
      // because of no valid values.
      return -1e9;
    }

    CfsSubsetEvaluator cse;
    cse.setDataFrame(*_workingCopy);
    std::vector<int> v;
    v.resize(_workingCopy->getNumFactors());
    for (unsigned int i = 0; i < _workingCopy->getNumFactors(); i++)
    {
      v[i] = i;
    }
    double score = cse.evaluateSubset(v) - _baseScore;
    int nodeCount = cg.countNodes();
    // penalize for large trees
    const int MAX_DESIRABLE_SIZE = 10;
    if (nodeCount > MAX_DESIRABLE_SIZE)
    {
      double m = 1 - pow(nodeCount - MAX_DESIRABLE_SIZE, 1.4) * .05;
      //cout << nodeCount << ", " << m << ", " << score << ", " << score * m << endl;
      if (m <= 0)
      {
        score = -numeric_limits<double>::infinity();
      }
      else if (score > 0.0)
      {
        score *= m;
      }
      else
      {
        score /= m;
      }
    }
    return score;
  }