Example #1
0
autoPCA TableOfReal_to_PCA (I) {
	iam (TableOfReal);
	try {
		long m = my numberOfRows, n = my numberOfColumns;

		if (! TableOfReal_areAllCellsDefined (me, 0, 0, 0, 0)) {
			Melder_throw (U"Undefined cells.");
		}
		if (m < 2) {
			Melder_throw (U"There is not enough data to perform a PCA.\nYour table has less than 2 rows.");
		}
		if (m < n) {
			Melder_warning (U"The number of rows in your table is less than the \nnumber of columns. ");
		}
		if (NUMfrobeniusnorm (m, n, my data) == 0) {
			Melder_throw (U"All values in your table are zero.");
		}
		autoPCA thee = Thing_new (PCA);
		autoNUMmatrix<double> a (NUMmatrix_copy (my data, 1, m, 1, n), 1, 1);
		thy centroid = NUMvector<double> (1, n);

		for (long j = 1; j <= n; j++) {
			double colmean = a[1][j];
			for (long i = 2; i <= m; i++) {
				colmean += a[i][j];
			}
			colmean /= m;
			for (long i = 1; i <= m; i++) {
				a[i][j] -= colmean;
			}
			thy centroid[j] = colmean;
		}
		Eigen_initFromSquareRoot (thee.peek(), a.peek(), m, n);
		thy labels = NUMvector<char32 *> (1, n);

		NUMstrings_copyElements (my columnLabels, thy labels, 1, n);

		PCA_setNumberOfObservations (thee.peek(), m);

		/*
			The covariance matrix C = A'A / (N-1). However, we have calculated
			the eigenstructure for A'A. This has no consequences for the
			eigenvectors, but the eigenvalues have to be divided by (N-1).
		*/

		for (long i = 1; i <= thy numberOfEigenvalues; i++) {
			thy eigenvalues[i] /= (m - 1);
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": PCA not created.");
	}
}
Example #2
0
CCA TableOfReal_to_CCA (TableOfReal me, long ny) {
	try {
		long n = my numberOfRows, nx = my numberOfColumns - ny;

		if (ny < 1 || ny > my numberOfColumns - 1) {
			Melder_throw (U"Dimension of first part not correct.");
		}
		if (ny > nx) {
			Melder_throw (U"The dimension of the dependent part (", ny, U") must be less than or equal to "
				"the dimension of the independent part (", nx, U").");
		}
		if (n < ny) {
			Melder_throw (U"The number of observations must be larger then ", ny, U".");
		}

		TableOfReal_areAllCellsDefined (me, 0, 0, 0, 0);
		// Use svd as (temporary) storage, and copy data

		autoSVD svdy = SVD_create (n, ny);
		autoSVD svdx = SVD_create (n, nx);

		for (long i = 1; i <= n; i++) {
			for (long j = 1; j <= ny; j++) {
				svdy -> u[i][j] = my data[i][j];
			}
			for (long j = 1; j <= nx; j++) {
				svdx -> u[i][j] = my data[i][ny + j];
			}
		}

		double **uy = svdy -> u;
		double **vy = svdy -> v;
		double **ux = svdx -> u;
		double **vx = svdx -> v;
		double fnormy = NUMfrobeniusnorm (n, ny, uy);
		double fnormx = NUMfrobeniusnorm (n, nx, ux);
		if (fnormy == 0.0 || fnormx == 0.0) {
			Melder_throw (U"One of the parts of the table contains only zeros.");
		}

		// Centre the data and svd it.

		NUMcentreColumns (uy, 1, n, 1, ny, nullptr);
		NUMcentreColumns (ux, 1, n, 1, nx, nullptr);

		SVD_compute (svdy.peek()); SVD_compute (svdx.peek());

		long numberOfZeroedy = SVD_zeroSmallSingularValues (svdy.peek(), 0.0);
		long numberOfZeroedx = SVD_zeroSmallSingularValues (svdx.peek(), 0.0);

		// Form the matrix C = ux' uy (use svd-object storage)

		autoSVD svdc = SVD_create (nx, ny);
		double **uc = svdc -> u;
		double **vc = svdc -> v;

		for (long i = 1; i <= nx; i++) {
			for (long j = 1; j <= ny; j++) {
				double t = 0;
				for (long q = 1; q <= n; q++) {
					t += ux[q][i] * uy[q][j];
				}
				uc[i][j] = t;
			}
		}

		SVD_compute (svdc.peek());
		long numberOfZeroedc = SVD_zeroSmallSingularValues (svdc.peek(), 0.0);
		long numberOfCoefficients = ny - numberOfZeroedc;

		autoCCA thee = CCA_create (numberOfCoefficients, ny, nx);
		thy yLabels = strings_to_Strings (my columnLabels, 1, ny);
		thy xLabels = strings_to_Strings (my columnLabels, ny + 1, my numberOfColumns);

		double **evecy = thy y -> eigenvectors;
		double **evecx = thy x -> eigenvectors;
		thy numberOfObservations = n;

		/*
			Y = Vy * inv(Dy) * Vc
			X = Vx * inv(Dx) * Uc
			For the eigenvectors we want a row representation:
			colums(Y) = rows(Y') = rows(Vc' * inv(Dy) * Vy')
			colums(X) = rows(X') = rows(Uc' * inv(Dx) * Vx')
			rows(Y') = evecy[i][j] = Vc[k][i] * Vy[j][k] / Dy[k]
			rows(X') = evecx[i][j] = Uc[k][i] * Vx[j][k] / Dx[k]
		*/

		for (long i = 1; i <= numberOfCoefficients; i++) {
			double ccc = svdc -> d[i];
			thy y -> eigenvalues[i] = thy x -> eigenvalues[i] = ccc * ccc;
			for (long j = 1; j <= ny; j++) {
				double t = 0.0;
				for (long q = 1; q <= ny - numberOfZeroedy; q++) {
					t += vc[q][i] * vy[j][q] / svdy -> d[q];
				}
				evecy[i][j] = t;
			}
			for (long j = 1; j <= nx; j++) {
				double t = 0.0;
				for (long q = 1; q <= nx - numberOfZeroedx; q++) {
					t += uc[q][i] * vx[j][q] / svdx -> d[q];
				}
				evecx[i][j] = t;
			}
		}

		// Normalize eigenvectors.

		NUMnormalizeRows (thy y -> eigenvectors, numberOfCoefficients, ny, 1);
		NUMnormalizeRows (thy x -> eigenvectors, numberOfCoefficients, nx, 1);
		Melder_assert (thy x -> dimension == thy xLabels -> numberOfStrings &&
		               thy y -> dimension == thy yLabels -> numberOfStrings);
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, U": CCA not created.");
	}
}
Example #3
0
File: CCA.c Project: alekstorm/tala
CCA TableOfReal_to_CCA (TableOfReal me, long ny)
{
	CCA thee = NULL;
	SVD svdy = NULL, svdx = NULL, svdc = NULL;
	double fnormy, fnormx, **uy, **vy, **ux, **vx, **uc, **vc;
	double **evecy, **evecx;
	long i, j, q, n = my numberOfRows;
	long numberOfZeroedy, numberOfZeroedx, numberOfZeroedc;
	long numberOfCoefficients;
	long nx = my numberOfColumns - ny;
		
	if (ny < 1 || ny > my numberOfColumns - 1) return Melder_errorp1 (L"Dimension of first part not correct.");

	if (! TableOfReal_areAllCellsDefined (me, 0, 0, 0, 0)) return NULL;

	/*
		The dependent 'part' of the CCA should be the smallest dimension.
	*/
		
	if (ny > nx) return Melder_errorp5 (L"The dimension of the dependent "
		"part (", Melder_integer (ny), L") must be less than or equal to the dimension of the "
		"independent part (", Melder_integer (nx), L").");
	
	if (n < ny) return Melder_errorp2 (L"The number of "
		"observations must be larger then ", Melder_integer (ny));

	/*
		Use svd as (temporary) storage, and copy data
	*/

	svdy = SVD_create (n, ny);
	if (svdy == NULL) goto end;
	svdx = SVD_create (n, nx);
	if (svdx == NULL) goto end;


	for (i = 1; i <= n; i++)
	{
		for (j = 1; j <= ny; j++)
		{
			svdy -> u[i][j] = my data[i][j];
		}
		for (j = 1; j <= nx; j++)
		{
			svdx -> u[i][j] = my data[i][ny + j];
		}
	}

 	uy = svdy -> u; vy = svdy -> v;
 	ux = svdx -> u; vx = svdx -> v;
	fnormy = NUMfrobeniusnorm (n, ny, uy);
	fnormx = NUMfrobeniusnorm (n, nx, ux);
	if (fnormy == 0 || fnormx == 0)
	{
		(void) Melder_errorp1 (L"One of the parts of the table contains only zeros.");
		goto end;
	}

	/*
		Centre the data and svd it.
	*/

	NUMcentreColumns (uy, 1, n, 1, ny, NULL);
	NUMcentreColumns (ux, 1, n, 1, nx, NULL);

	if (! SVD_compute (svdy) || ! SVD_compute (svdx)) goto end;

	numberOfZeroedy = SVD_zeroSmallSingularValues (svdy, 0);
	numberOfZeroedx = SVD_zeroSmallSingularValues (svdx, 0);

	/*
		Form the matrix C = ux' uy (use svd-object storage)
	*/

	svdc = SVD_create (nx, ny);
	if (svdc == NULL)  goto end;
 	uc = svdc -> u; vc = svdc -> v;

	for (i = 1; i <= nx; i++)
	{
		for (j = 1; j <= ny; j++)
		{
			double t = 0;
			for (q = 1; q <= n; q++)
			{
				t += ux[q][i] * uy[q][j];
			}
			uc[i][j] = t;
		}
	}

	if (! SVD_compute (svdc)) goto end;
	numberOfZeroedc = SVD_zeroSmallSingularValues (svdc, 0);
	numberOfCoefficients = ny - numberOfZeroedc;

	thee = CCA_create (numberOfCoefficients, ny, nx);
	if (thee == NULL) goto end;
	thy yLabels = strings_to_Strings (my columnLabels, 1, ny);
	if ( thy yLabels == NULL) goto end;
	thy xLabels = strings_to_Strings (my columnLabels, ny+1, my numberOfColumns);
	if ( thy xLabels == NULL) goto end;
	
	evecy = thy y -> eigenvectors;
	evecx = thy x -> eigenvectors;
	thy numberOfObservations = n;

	/*
		Y = Vy * inv(Dy) * Vc
		X = Vx * inv(Dx) * Uc
		For the eigenvectors we want a row representation:
		colums(Y) = rows(Y') = rows(Vc' * inv(Dy) * Vy')
		colums(X) = rows(X') = rows(Uc' * inv(Dx) * Vx')
		rows(Y') = evecy[i][j] = Vc[k][i] * Vy[j][k] / Dy[k]
		rows(X') = evecx[i][j] = Uc[k][i] * Vx[j][k] / Dx[k]
	*/

	for (i = 1; i <= numberOfCoefficients; i++)
	{
		double ccc = svdc -> d[i];
		thy y -> eigenvalues[i] = thy x -> eigenvalues[i] = ccc * ccc;
		for (j = 1; j <= ny; j++)
		{
			double t = 0;
			for (q = 1; q <= ny - numberOfZeroedy; q++)
			{
				t += vc[q][i] * vy[j][q] / svdy -> d[q];
			}
			evecy[i][j] = t;	
		}
		for (j = 1; j <= nx; j++)
		{
			double t = 0;
			for (q = 1; q <= nx - numberOfZeroedx; q++)
			{
				t += uc[q][i] * vx[j][q] / svdx -> d[q];
			}
			evecx[i][j] = t;	
		}
	}
	
	/*
		Normalize eigenvectors.
	*/
	NUMnormalizeRows (thy y -> eigenvectors, numberOfCoefficients, ny, 1);
	NUMnormalizeRows (thy x -> eigenvectors, numberOfCoefficients, nx, 1);
	
end:

	forget (svdy); 
	forget (svdx); 
	forget (svdc);
	Melder_assert (thy x -> dimension == thy xLabels -> numberOfStrings && 
		thy y -> dimension == thy yLabels -> numberOfStrings);
	if (Melder_hasError ()) forget (thee);
	
	return thee;
}
Example #4
0
Discriminant TableOfReal_to_Discriminant (TableOfReal me) {
	try {
		autoDiscriminant thee = Thing_new (Discriminant);
		long dimension = my numberOfColumns;

		TableOfReal_areAllCellsDefined (me, 0, 0, 0, 0);

		if (NUMdmatrix_hasInfinities (my data, 1, my numberOfRows, 1, dimension)) {
			Melder_throw (U"Table contains infinities.");
		}

		if (! TableOfReal_hasRowLabels (me)) {
			Melder_throw (U"At least one of the rows has no label.");
		}

		autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me);
		if (! TableOfReal_hasColumnLabels (mew.peek())) {
			TableOfReal_setSequentialColumnLabels (mew.peek(), 0, 0, U"c", 1, 1);
		}

		thy groups = TableOfReal_to_SSCPs_byLabel (mew.peek());
		thy total = TableOfReal_to_SSCP (mew.peek(), 0, 0, 0, 0);

		if ( (thy numberOfGroups = thy groups -> size) < 2) {
			Melder_throw (U"Number of groups must be greater than one.");
		}

		TableOfReal_centreColumns_byRowLabel (mew.peek());

		// Overall centroid and apriori probabilities and costs.

		autoNUMvector<double> centroid (1, dimension);
		autoNUMmatrix<double> between (1, thy numberOfGroups, 1, dimension);
		thy aprioriProbabilities = NUMvector<double> (1, thy numberOfGroups);
		thy costs = NUMmatrix<double> (1, thy numberOfGroups, 1, thy numberOfGroups);

		double sum = 0, scale;
		for (long k = 1; k <= thy numberOfGroups; k++) {
			SSCP m = (SSCP) thy groups -> item[k];
			sum += scale = SSCP_getNumberOfObservations (m);
			for (long j = 1; j <= dimension; j++) {
				centroid[j] += scale * m -> centroid[j];
			}
		}

		for	(long j = 1; j <= dimension; j++) {
			centroid[j] /= sum;
		}

		for (long k = 1; k <= thy numberOfGroups; k++) {
			SSCP m = (SSCP) thy groups -> item[k];
			scale = SSCP_getNumberOfObservations (m);
			thy aprioriProbabilities[k] = scale / my numberOfRows;
			for (long j = 1; j <= dimension; j++) {
				between[k][j] = sqrt (scale) * (m -> centroid[j] - centroid[j]);
			}
		}

		// We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices.
		// We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for
		// the eigenvalues and eigenvectors of the equation.

		Eigen_initFromSquareRootPair (thee.peek(), between.peek(), thy numberOfGroups, dimension, mew -> data, my numberOfRows);

		// Default priors and costs

		for (long k = 1; k <= thy numberOfGroups; k++) {
			for (long j = k + 1; j <= thy numberOfGroups; j++) {
				thy costs[k][j] = thy costs[j][k] = 1;
			}
		}
		return thee.transfer();
	} catch (MelderError) {
		Melder_throw (me, U": Discriminant not created.");
	}
}