Example #1
0
static autoPCA NUMdmatrix_to_PCA (double **m, long numberOfRows, long numberOfColumns, bool byColumns) {
	try {
		if (! NUMdmatrix_hasFiniteElements(m, 1, numberOfRows, 1, numberOfColumns)) {
			Melder_throw (U"At least one of the matrix elements is not finite or undefined.");
		}
		if (NUMfrobeniusnorm (numberOfRows, numberOfColumns, m) == 0.0) {
			Melder_throw (U"All values in your table are zero.");
		}
		autoNUMmatrix<double> mcopy;
		long numberOfRows2, numberOfColumns2;
		if (byColumns) {
			if (numberOfColumns < numberOfRows) {
				Melder_warning (U"The number of columns in your table is less than the number of rows. ");
			}
			numberOfRows2 = numberOfColumns, numberOfColumns2 = numberOfRows;
			mcopy.reset (1, numberOfRows2, 1, numberOfColumns2);
			for (long i = 1; i <= numberOfRows2; i ++) { // transpose
				for (long j = 1; j <= numberOfColumns2; j++) {
					mcopy [i] [j] = m [j] [i];
				}
			}
		} else {
			if (numberOfRows < numberOfColumns) {
				Melder_warning (U"The number of rows in your table is less than the number of columns. ");
			}
			numberOfRows2 = numberOfRows, numberOfColumns2 = numberOfColumns;
			mcopy.reset (1, numberOfRows2, 1, numberOfColumns2);
			NUMmatrix_copyElements<double>(m, mcopy.peek(), 1, numberOfRows2, 1, numberOfColumns2);
		}
		
		autoPCA thee = Thing_new (PCA);
		thy centroid = NUMvector<double> (1, numberOfColumns2);
		NUMcentreColumns (mcopy.peek(), 1, numberOfRows2, 1, numberOfColumns2, thy centroid);
		Eigen_initFromSquareRoot (thee.get(), mcopy.peek(), numberOfRows2, numberOfColumns2);
		thy labels = NUMvector<char32 *> (1, numberOfColumns2);

		PCA_setNumberOfObservations (thee.get(), numberOfRows2);

		/*
			The covariance matrix C = A'A / (N-1). However, we have calculated
			the eigenstructure for A'A. This has no consequences for the
			eigenvectors, but the eigenvalues have to be divided by (N-1).
		*/

		for (long i = 1; i <= thy numberOfEigenvalues; i++) {
			thy eigenvalues [i] /= (numberOfRows2 - 1);
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (U"No PCA created from ", byColumns ? U"columns." : U"rows.");
	}	
}
Example #2
0
autoDiscriminant TableOfReal_to_Discriminant (TableOfReal me) {
    try {
        autoDiscriminant thee = Thing_new (Discriminant);
        long dimension = my numberOfColumns;

        if (! NUMdmatrix_hasFiniteElements(my data, 1, my numberOfRows, 1, my numberOfColumns)) {
            Melder_throw (U"At least one of the table's elements is not finite or undefined.");
        }

        if (! TableOfReal_hasRowLabels (me)) {
            Melder_throw (U"At least one of the rows has no label.");
        }

        autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me);
        if (! TableOfReal_hasColumnLabels (mew.get())) {
            TableOfReal_setSequentialColumnLabels (mew.get(), 0, 0, U"c", 1, 1);
        }

        thy groups = TableOfReal_to_SSCPList_byLabel (mew.get());
        thy total = TableOfReal_to_SSCP (mew.get(), 0, 0, 0, 0);

        if ((thy numberOfGroups = thy groups -> size) < 2) {
            Melder_throw (U"Number of groups must be greater than one.");
        }

        TableOfReal_centreColumns_byRowLabel (mew.get());

        // Overall centroid and apriori probabilities and costs.

        autoNUMvector<double> centroid (1, dimension);
        autoNUMmatrix<double> between (1, thy numberOfGroups, 1, dimension);
        thy aprioriProbabilities = NUMvector<double> (1, thy numberOfGroups);
        thy costs = NUMmatrix<double> (1, thy numberOfGroups, 1, thy numberOfGroups);

        double sum = 0, scale;
        for (long k = 1; k <= thy numberOfGroups; k ++) {
            SSCP m = thy groups->at [k];
            sum += scale = SSCP_getNumberOfObservations (m);
            for (long j = 1; j <= dimension; j ++) {
                centroid [j] += scale * m -> centroid [j];
            }
        }

        for	(long j = 1; j <= dimension; j ++) {
            centroid [j] /= sum;
        }

        for (long k = 1; k <= thy numberOfGroups; k ++) {
            SSCP m = thy groups->at [k];
            scale = SSCP_getNumberOfObservations (m);
            thy aprioriProbabilities[k] = scale / my numberOfRows;
            for (long j = 1; j <= dimension; j ++) {
                between [k] [j] = sqrt (scale) * (m -> centroid [j] - centroid [j]);
            }
        }

        // We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices.
        // We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for
        // the eigenvalues and eigenvectors of the equation.

        thy eigen = Thing_new (Eigen);
        Eigen_initFromSquareRootPair (thy eigen.get(), between.peek(), thy numberOfGroups, dimension, mew -> data, my numberOfRows);

        // Default priors and costs

        for (long igroup = 1; igroup <= thy numberOfGroups; igroup ++) {
            for (long jgroup = igroup + 1; jgroup <= thy numberOfGroups; jgroup ++) {
                thy costs [igroup] [jgroup] = thy costs [jgroup] [igroup] = 1.0;
            }
        }
        return thee;
    } catch (MelderError) {
        Melder_throw (me, U": Discriminant not created.");
    }
}
Example #3
0
autoCCA TableOfReal_to_CCA (TableOfReal me, long ny) {
	try {
		long n = my numberOfRows, nx = my numberOfColumns - ny;

		if (ny < 1 || ny > my numberOfColumns - 1) {
			Melder_throw (U"Dimension of first part not correct.");
		}
		if (ny > nx) {
			Melder_throw (U"The dimension of the dependent part (", ny, U") must be less than or equal to "
				"the dimension of the independent part (", nx, U").");
		}
		if (n < ny) {
			Melder_throw (U"The number of observations must be larger then ", ny, U".");
		}
			
		if (! NUMdmatrix_hasFiniteElements (my data, 1, my numberOfRows, 1, my numberOfColumns)) {
			Melder_throw (U"At least one of the table's elements is not finite or undefined.");;
		}
		// Use svd as (temporary) storage, and copy data

		autoSVD svdy = SVD_create (n, ny);
		autoSVD svdx = SVD_create (n, nx);

		for (long i = 1; i <= n; i++) {
			for (long j = 1; j <= ny; j++) {
				svdy -> u[i][j] = my data[i][j];
			}
			for (long j = 1; j <= nx; j++) {
				svdx -> u[i][j] = my data[i][ny + j];
			}
		}

		double **uy = svdy -> u;
		double **vy = svdy -> v;
		double **ux = svdx -> u;
		double **vx = svdx -> v;
		double fnormy = NUMfrobeniusnorm (n, ny, uy);
		double fnormx = NUMfrobeniusnorm (n, nx, ux);
		if (fnormy == 0.0 || fnormx == 0.0) {
			Melder_throw (U"One of the parts of the table contains only zeros.");
		}

		// Centre the data and svd it.

		NUMcentreColumns (uy, 1, n, 1, ny, nullptr);
		NUMcentreColumns (ux, 1, n, 1, nx, nullptr);

		SVD_compute (svdy.get());
		SVD_compute (svdx.get());

		long numberOfZeroedy = SVD_zeroSmallSingularValues (svdy.get(), 0.0);
		long numberOfZeroedx = SVD_zeroSmallSingularValues (svdx.get(), 0.0);

		// Form the matrix C = ux' uy (use svd-object storage)

		autoSVD svdc = SVD_create (nx, ny);
		double **uc = svdc -> u;
		double **vc = svdc -> v;

		for (long i = 1; i <= nx; i ++) {
			for (long j = 1; j <= ny; j ++) {
				double t = 0.0;
				for (long q = 1; q <= n; q ++) {
					t += ux [q] [i] * uy [q] [j];
				}
				uc [i] [j] = t;
			}
		}

		SVD_compute (svdc.get());
		long numberOfZeroedc = SVD_zeroSmallSingularValues (svdc.get(), 0.0);
		long numberOfCoefficients = ny - numberOfZeroedc;

		autoCCA thee = CCA_create (numberOfCoefficients, ny, nx);
		thy yLabels = strings_to_Strings (my columnLabels, 1, ny);
		thy xLabels = strings_to_Strings (my columnLabels, ny + 1, my numberOfColumns);

		double **evecy = thy y -> eigenvectors;
		double **evecx = thy x -> eigenvectors;
		thy numberOfObservations = n;

		/*
			Y = Vy * inv(Dy) * Vc
			X = Vx * inv(Dx) * Uc
			For the eigenvectors we want a row representation:
			colums(Y) = rows(Y') = rows(Vc' * inv(Dy) * Vy')
			colums(X) = rows(X') = rows(Uc' * inv(Dx) * Vx')
			rows(Y') = evecy[i][j] = Vc[k][i] * Vy[j][k] / Dy[k]
			rows(X') = evecx[i][j] = Uc[k][i] * Vx[j][k] / Dx[k]
		*/

		for (long i = 1; i <= numberOfCoefficients; i ++) {
			double ccc = svdc -> d [i];
			thy y -> eigenvalues [i] = thy x -> eigenvalues [i] = ccc * ccc;
			for (long j = 1; j <= ny; j ++) {
				double t = 0.0;
				for (long q = 1; q <= ny - numberOfZeroedy; q ++) {
					t += vc [q] [i] * vy [j] [q] / svdy -> d [q];
				}
				evecy [i] [j] = t;
			}
			for (long j = 1; j <= nx; j ++) {
				double t = 0.0;
				for (long q = 1; q <= nx - numberOfZeroedx; q ++) {
					t += uc [q] [i] * vx [j] [q] / svdx -> d [q];
				}
				evecx [i] [j] = t;
			}
		}

		// Normalize eigenvectors.

		NUMnormalizeRows (thy y -> eigenvectors, numberOfCoefficients, ny, 1);
		NUMnormalizeRows (thy x -> eigenvectors, numberOfCoefficients, nx, 1);
		Melder_assert (thy x -> dimension == thy xLabels -> numberOfStrings &&
		               thy y -> dimension == thy yLabels -> numberOfStrings);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": CCA not created.");
	}
}