double InformationGainCalculator::calculateInformationGain(const DataFrame& df1, 
    int factorIndex1, const DataFrame& df2, int factorIndex2)
  {
    assert(df1.isNominal(factorIndex1));
    assert(df2.isNominal(factorIndex2));

    double hy = _calculateEntropy(df1, factorIndex1);
    double hyx = _calculateConditionalEntropy(df1, factorIndex1, df2, factorIndex2);

    double gain = hy - hyx;

    return gain;
  }  
  double SymmetricUncertaintyCalculator::calculateUncertainty(const DataFrame& df1, 
    int factorIndex1, const DataFrame& df2, int factorIndex2)
  {
    assert(df1.isNominal(factorIndex1));
    assert(df2.isNominal(factorIndex2));

    double hy = _calculateEntropy(df1, factorIndex1);
    double hyx = _calculateConditionalEntropy(df1, factorIndex1, df2, factorIndex2);

    double gain = hy - hyx;

    double hx = _calculateEntropy(df2, factorIndex2);

    if (hy + hx == 0.0)
    {
      return 1.0;
    }
    double result = 2.0 * (gain / (hy + hx));

    return result;
  }
  void PrincipalComponentsAnalysis::compute(DataFrame& df)
  {
    if (df.getNumFactors() > 2)
    {
      // see PrincipalComponentsAnalysisTest
      cout << "You realize this hasn't been tested, right?" << endl;
    }
    Matrix dataMat(df.getNumFactors(), df.getNumDataVectors());
    Matrix deviates(df.getNumFactors(), df.getNumDataVectors());
    SymmetricMatrix covar(df.getNumFactors());
    DiagonalMatrix eigenValues(df.getNumFactors());
    Matrix eigenVectors;
    ColumnVector means(df.getNumFactors());
    means = 0.0;
    RowVector h(df.getNumDataVectors());
    h = 1.0;

    for (unsigned int j = 0; j < df.getNumFactors(); j++)
    {
      if (df.isNominal(j))
      {
        throw Tgs::Exception("Only numeric values are supported.");
      }
    }


    for(unsigned int i = 0; i < df.getNumDataVectors(); i++)
    {
      for (unsigned int j = 0; j < df.getNumFactors(); j++)
      {
        double v = df.getDataElement(i, j);
        if (df.isNull(v))
        {
          throw Tgs::Exception("Only non-null values are supported.");
        }
        dataMat.element(j, i) = v;
        means.element(j) += v / (double)df.getNumDataVectors();
      }
    }

    try
    {
      deviates = dataMat - (means * h);
      covar << (1.0/(float)df.getNumDataVectors()) * (deviates * deviates.t());
      Jacobi::jacobi(covar, eigenValues, eigenVectors);
    }
    catch (const std::exception&)
    {
      throw;
    }
    catch (...)
    {
      throw Tgs::Exception("Unknown error while calculating PCA");
    }

    _sortEigens(eigenVectors, eigenValues);

    _components.resize(df.getNumFactors());
    for (unsigned int v = 0; v < df.getNumFactors(); v++)
    {
      _components[v].resize(df.getNumFactors());
      for (unsigned int d = 0; d < df.getNumFactors(); d++)
      {
        _components[v][d] = eigenVectors.element(d, v);
      }
    }
  }