void PCATestCase::decorrelation() { OpenANN::RandomNumberGenerator rng; const int N = 100; const int D = 2; Eigen::MatrixXd X(N, D); rng.fillNormalDistribution(X); // Linear transformation (correlation) Eigen::MatrixXd A(D, D); A << 1.0, 0.5, 0.5, 2.0; Eigen::MatrixXd Xt = X * A.transpose(); // Decorrelation (without dimensionality reduction) OpenANN::PCA pca(D); pca.fit(Xt); Eigen::MatrixXd Y = pca.transform(Xt); // Covariance matrix should be identity matrix Eigen::MatrixXd cov = Y.transpose() * Y; ASSERT_EQUALS_DELTA(cov(0, 0), (double) N, 1e-5); ASSERT_EQUALS_DELTA(cov(1, 1), (double) N, 1e-5); ASSERT_EQUALS_DELTA(cov(0, 1), 0.0, 1e-5); ASSERT_EQUALS_DELTA(cov(1, 0), 0.0, 1e-5); Eigen::VectorXd evr = pca.explainedVarianceRatio(); double evrSum = evr.sum(); ASSERT_EQUALS_DELTA(evrSum, 1.0, 1e-5); }
void KMeansTestCase::clustering() { const int N = 1000; const int D = 10; Eigen::MatrixXd X(N, D); OpenANN::RandomNumberGenerator rng; rng.fillNormalDistribution(X); OpenANN::KMeans kmeans(D, 5); const int batchSize = 200; double averageDistToCenter = std::numeric_limits<double>::max(); for(int i = 0; i < N/batchSize; i++) { // Data points will be closer to centers after each update Eigen::MatrixXd Y = kmeans.fitPartial(X.block(i*batchSize, 0, batchSize, D)).transform(X); const double newDistance = Y.array().rowwise().maxCoeff().sum(); ASSERT(newDistance < averageDistToCenter); averageDistToCenter = newDistance; } }
void PCATestCase::dimensionalityReduction() { OpenANN::RandomNumberGenerator rng; const int N = 100; const int D = 5; Eigen::MatrixXd X(N, D); rng.fillNormalDistribution(X); // Strong correlation Eigen::MatrixXd A = Eigen::MatrixXd::Identity(D, D) * 0.5 + Eigen::MatrixXd::Ones(D, D); Eigen::MatrixXd Xt = X * A.transpose(); // Dimensionality reduction OpenANN::PCA pca(1); pca.fit(Xt); Eigen::MatrixXd Y = pca.transform(Xt); ASSERT_EQUALS(Y.rows(), N); ASSERT_EQUALS(Y.cols(), 1); ASSERT_EQUALS(pca.explainedVarianceRatio().rows(), 1); ASSERT(pca.explainedVarianceRatio().sum() > 0.9); }