예제 #1
0
파일: PCA.cpp 프로젝트: PaulBoersma/praat
autoTableOfReal PCA_and_Configuration_to_TableOfReal_reconstruct (PCA me, Configuration thee) {
	try {
		long npc = thy numberOfColumns;

		if (thy numberOfColumns > my dimension) {
			Melder_throw (U"The dimension of the Configuration must be less than or equal to the dimension of the PCA.");
		}

		if (npc > my numberOfEigenvalues) {
			npc = my numberOfEigenvalues;
		}

		autoTableOfReal him = TableOfReal_create (thy numberOfRows, my dimension);
		NUMstrings_copyElements (my labels, his columnLabels, 1, my dimension);
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows);

		for (long i = 1; i <= thy numberOfRows; i++) {
			double *hisdata = his data[i];
			for (long k = 1; k <= npc; k++) {
				double *evec = my eigenvectors[k], pc = thy data[i][k];
				for (long j = 1; j <= my dimension; j++) {
					hisdata[j] += pc * evec[j];
				}
			}
		}
		return him;
	} catch (MelderError) {
		Melder_throw (U"TableOfReal not reconstructed.");
	}
}
예제 #2
0
autoTableOfReal Confusion_to_TableOfReal_marginals (Confusion me) {
	try {
		autoTableOfReal thee = TableOfReal_create (my numberOfRows + 1, my numberOfColumns + 1);

		double total = 0.0;
		for (long i = 1; i <= my numberOfRows; i++) {
			double rowsum = 0.0;
			for (long j = 1; j <= my numberOfColumns; j++) {
				thy data[i][j] = my data[i][j];
				rowsum += my data[i][j];
			}
			thy data[i][my numberOfColumns + 1] = rowsum;
			total += rowsum;
		}

		thy data[my numberOfRows + 1][my numberOfColumns + 1] = total;

		for (long j = 1; j <= my numberOfColumns; j++) {
			double colsum = 0;
			for (long i = 1; i <= my numberOfRows; i++) {
				colsum += my data[i][j];
			}
			thy data[my numberOfRows + 1][j] = colsum;
		}

		NUMstrings_copyElements (my rowLabels, thy rowLabels, 1, my numberOfRows);
		NUMstrings_copyElements (my columnLabels, thy columnLabels, 1, my numberOfColumns);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": table with marginals not created.");
	}
}
예제 #3
0
autoConfusion Confusion_condense (Confusion me, const char32 *search, const char32 *replace,
	long maximumNumberOfReplaces, int use_regexp) {
	try {
		long nmatches, nstringmatches;

		if (my rowLabels == 0 || my columnLabels == 0) {
			Melder_throw (U"No row or column labels.");
		}
		autostring32vector rowLabels (strs_replace (my rowLabels, 1, my numberOfRows, search, replace,
			maximumNumberOfReplaces, &nmatches, &nstringmatches, use_regexp), 1, my numberOfRows);

		autostring32vector columnLabels (strs_replace (my columnLabels, 1, my numberOfColumns,  search, replace,
			 maximumNumberOfReplaces, &nmatches, &nstringmatches, use_regexp), 1, my numberOfColumns);

		autoStrings srow = Thing_new (Strings);
		srow -> numberOfStrings = my numberOfRows;
		srow -> strings = rowLabels.transfer();

		autoStrings scol = Thing_new (Strings);
		scol -> numberOfStrings = my numberOfColumns;
		scol -> strings = columnLabels.transfer();

		/* Find dimension of new Confusion */
		autoDistributions dcol = Strings_to_Distributions (scol.get());
		long nresp = dcol -> numberOfRows;

		autoDistributions drow = Strings_to_Distributions (srow.get());
		long nstim = drow -> numberOfRows;

		autoConfusion thee = Confusion_create (nstim, nresp);

		NUMstrings_copyElements (drow -> rowLabels, thy rowLabels, 1, nstim);
		NUMstrings_copyElements (dcol -> rowLabels, thy columnLabels, 1, nresp);

		autoNUMvector<long> rowIndex (1, my numberOfRows);
		create_index (srow -> strings, 1, my numberOfRows, drow -> rowLabels, 1, nstim, rowIndex.peek());
		autoNUMvector<long> columnIndex (1, my numberOfColumns);
		create_index (scol -> strings, 1, my numberOfColumns, dcol -> rowLabels, 1, nresp, columnIndex.peek());

		for (long i = 1; i <= my numberOfRows; i++) {
			for (long j = 1; j <= my numberOfColumns; j++) {
				thy data [rowIndex [i]][columnIndex[j]] += my data[i][j];
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": not condensed.");
	}
}
예제 #4
0
/*
	raw r[j]: eigenvec[i][j]
	unstandardized u[j]: sqrt(N-g) * r[j]
	standardized s[j]: u[j] sqrt (w[i][i] / (N-g))
*/
autoTableOfReal Discriminant_extractCoefficients (Discriminant me, int choice) {
    try {
        int raw = choice == 0, standardized = choice == 2;
        long nx = my eigen -> dimension, ny = my eigen -> numberOfEigenvalues;

        SSCP total = my total.get();
        autoTableOfReal thee = TableOfReal_create (ny, nx + 1);
        NUMstrings_copyElements (my total -> columnLabels, thy columnLabels, 1, nx);

        autoSSCP within;
        if (standardized) {
            within = Discriminant_extractPooledWithinGroupsSSCP (me);
        }

        TableOfReal_setColumnLabel (thee.get(), nx + 1, U"constant");
        TableOfReal_setSequentialRowLabels (thee.get(), 1, ny, U"function_", 1, 1);

        double scale = sqrt (total -> numberOfObservations - my numberOfGroups);
        double *centroid = my total -> centroid;
        for (long i = 1; i <= ny; i++) {
            double u0 = 0.0, ui;
            for (long j = 1; j <= nx; j++) {
                if (standardized) {
                    scale = sqrt (within -> data[j][j]);
                }
                thy data[i][j] = ui = scale * my eigen -> eigenvectors[i][j];;
                u0 += ui * centroid[j];
            }
            thy data[i][nx + 1] = raw ? 0.0 : -u0;
        }
        return thee;
    } catch (MelderError) {
        Melder_throw (me, U": coefficients not extracted.");
    }
}
예제 #5
0
파일: CCA.c 프로젝트: alekstorm/tala
TableOfReal CCA_and_TableOfReal_scores (CCA me, TableOfReal thee, long numberOfFactors)
{
	TableOfReal him = NULL;
	long n = thy numberOfRows;
	long nx = my x -> dimension, ny = my y -> dimension;

	if (ny + nx != thy numberOfColumns) return Melder_errorp7 (L"The number "
		"of columns in the table (", Melder_integer (thy numberOfColumns), L") does not agree with "
		"the dimensions of the CCA object (ny + nx = ", Melder_integer (ny), L" + ", Melder_integer (nx), L").");

	if (numberOfFactors == 0) numberOfFactors = my numberOfCoefficients;
	if (numberOfFactors < 1 || numberOfFactors > my numberOfCoefficients)
		return Melder_errorp3 (L"The number of factors must be in interval "
			"[1, ", Melder_integer (my numberOfCoefficients), L"].");
	
	him = TableOfReal_create (n, 2 * numberOfFactors);
	if (him == NULL) return NULL;

	if (! NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows) ||
		! Eigen_and_TableOfReal_project_into (my y, thee, 1, ny, &him, 1, numberOfFactors) ||
		! Eigen_and_TableOfReal_project_into (my x, thee, ny + 1,
			thy numberOfColumns, &him, numberOfFactors + 1, his numberOfColumns) ||
		! TableOfReal_setSequentialColumnLabels (him, 1, numberOfFactors, L"y_", 1, 1) ||
		! TableOfReal_setSequentialColumnLabels (him, numberOfFactors + 1,
			his numberOfColumns, L"x_", 1, 1)) forget (him);

	return him;
}
예제 #6
0
파일: CCA.cpp 프로젝트: eginhard/praat
TableOfReal CCA_and_TableOfReal_scores (CCA me, TableOfReal thee, long numberOfFactors) {
	try {
		long n = thy numberOfRows;
		long nx = my x -> dimension, ny = my y -> dimension;

		if (ny + nx != thy numberOfColumns) {
			Melder_throw (U"The number of columns in the table (", thy numberOfColumns,
				U") does not agree with the dimensions of the CCA object (ny + nx = ", ny, U" + ", nx, U").");
		}
		if (numberOfFactors == 0) {
			numberOfFactors = my numberOfCoefficients;
		}
		if (numberOfFactors < 1 || numberOfFactors > my numberOfCoefficients) {
			Melder_throw (U"The number of factors must be in interval [1, ", my numberOfCoefficients, U"].");
		}
		autoTableOfReal him = TableOfReal_create (n, 2 * numberOfFactors);
		TableOfReal phim = him.peek();
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows);
		Eigen_and_TableOfReal_project_into (my y, thee, 1, ny, &phim, 1, numberOfFactors);
		Eigen_and_TableOfReal_project_into (my x, thee, ny + 1, thy numberOfColumns, &phim, numberOfFactors + 1, his numberOfColumns);
		TableOfReal_setSequentialColumnLabels (him.peek(), 1, numberOfFactors, U"y_", 1, 1);
		TableOfReal_setSequentialColumnLabels (him.peek(), numberOfFactors + 1, his numberOfColumns, U"x_", 1, 1);
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (me, U": no TableOfReal with scores created.");
	}
}
예제 #7
0
autoConfusion Confusion_groupStimuli (Confusion me, const char32 *labels, const char32 *newLabel, long newpos) {
	try {
		long ncondense = Melder_countTokens (labels);
		autoNUMvector<long> irow (1, my numberOfRows);

		for (long i = 1; i <= my numberOfRows; i++) {
			irow[i] = i;
		}

		for (char32 *token = Melder_firstToken (labels); token != nullptr; token = Melder_nextToken ()) {
			for (long i = 1; i <= my numberOfRows; i++) {
				if (Melder_equ (token, my rowLabels[i])) {
					irow[i] = 0;
					break;
				}
			}
		}
		long nfound = 0;
		for (long i = 1; i <= my numberOfRows; i++) {
			if (irow[i] == 0) {
				nfound ++;
			}
		}
		if (nfound == 0) {
			Melder_throw (U"Invalid stimulus labels.");
		}
		if (nfound != ncondense) {
			Melder_warning (U"One or more of the given stimulus labels are suspect.");
		}
		long newnstim = my numberOfRows - nfound + 1;
		if (newpos < 1) {
			newpos = 1;
		}
		if (newpos > newnstim) {
			newpos = newnstim;
		}
		autoConfusion thee = Confusion_create (newnstim, my numberOfColumns);
		NUMstrings_copyElements (my columnLabels, thy columnLabels, 1, my numberOfColumns);

		TableOfReal_setRowLabel (thee.get(), newpos, newLabel);
		long inewrow = 1;
		for (long i = 1; i <= my numberOfRows; i++) {
			long rowpos = newpos;
			if (irow[i] > 0) {
				if (inewrow == newpos) {
					inewrow++;
				}
				rowpos = inewrow;
				inewrow++;
				TableOfReal_setRowLabel (thee.get(), rowpos, my rowLabels[i]);
			}
			for (long j = 1; j <= my numberOfColumns; j++) {
				thy data[rowpos][j] += my data[i][j];
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": stimuli not grouped.");
	}
}
예제 #8
0
autoConfusion Confusion_groupResponses (Confusion me, const char32 *labels, const char32 *newLabel, long newpos) {
	try {
		long ncondense = Melder_countTokens (labels);
		autoNUMvector<long> icol (1, my numberOfColumns);

		for (long i = 1; i <= my numberOfColumns; i++) {
			icol[i] = i;
		}

		for (char32 *token = Melder_firstToken (labels); token != 0; token = Melder_nextToken ()) {
			for (long i = 1; i <= my numberOfColumns; i++) {
				if (Melder_equ (token, my columnLabels[i])) {
					icol[i] = 0;
					break;
				}
			}
		}
		long nfound = 0;
		for (long i = 1; i <= my numberOfColumns; i++) {
			if (icol[i] == 0) {
				nfound ++;
			}
		}
		if (nfound == 0) {
			Melder_throw (U"Invalid response labels.");
		}
		if (nfound != ncondense) {
			Melder_warning (U"One or more of the given response labels are suspect.");
		}
		long newnresp = my numberOfColumns - nfound + 1;
		if (newpos < 1) {
			newpos = 1;
		}
		if (newpos > newnresp) {
			newpos = newnresp;
		}
		autoConfusion thee = Confusion_create (my numberOfRows, newnresp);
		NUMstrings_copyElements (my rowLabels, thy rowLabels, 1, my numberOfRows);
		TableOfReal_setColumnLabel (thee.get(), newpos, newLabel);
		long inewcol = 1;
		for (long i = 1; i <= my numberOfColumns; i++) {
			long colpos = newpos;
			if (icol[i] > 0) {
				if (inewcol == newpos) {
					inewcol++;
				}
				colpos = inewcol;
				inewcol++;
				TableOfReal_setColumnLabel (thee.get(), colpos, my columnLabels[i]);
			}
			for (long j = 1; j <= my numberOfRows; j++) {
				thy data[j][colpos] += my data[j][i];
			}
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": responses not grouped.");
	}
}
예제 #9
0
파일: PCA.cpp 프로젝트: PaulBoersma/praat
autoPCA TableOfReal_to_PCA_byRows (TableOfReal me) {
	try {
		autoPCA thee = NUMdmatrix_to_PCA (my data, my numberOfRows, my numberOfColumns, false);
		NUMstrings_copyElements (my columnLabels, thy labels, 1, my numberOfColumns);
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": PCA not created.");
	}
}
예제 #10
0
파일: PCA.cpp 프로젝트: ffostertw/praat
autoPCA TableOfReal_to_PCA (I) {
	iam (TableOfReal);
	try {
		long m = my numberOfRows, n = my numberOfColumns;

		if (! TableOfReal_areAllCellsDefined (me, 0, 0, 0, 0)) {
			Melder_throw (U"Undefined cells.");
		}
		if (m < 2) {
			Melder_throw (U"There is not enough data to perform a PCA.\nYour table has less than 2 rows.");
		}
		if (m < n) {
			Melder_warning (U"The number of rows in your table is less than the \nnumber of columns. ");
		}
		if (NUMfrobeniusnorm (m, n, my data) == 0) {
			Melder_throw (U"All values in your table are zero.");
		}
		autoPCA thee = Thing_new (PCA);
		autoNUMmatrix<double> a (NUMmatrix_copy (my data, 1, m, 1, n), 1, 1);
		thy centroid = NUMvector<double> (1, n);

		for (long j = 1; j <= n; j++) {
			double colmean = a[1][j];
			for (long i = 2; i <= m; i++) {
				colmean += a[i][j];
			}
			colmean /= m;
			for (long i = 1; i <= m; i++) {
				a[i][j] -= colmean;
			}
			thy centroid[j] = colmean;
		}
		Eigen_initFromSquareRoot (thee.peek(), a.peek(), m, n);
		thy labels = NUMvector<char32 *> (1, n);

		NUMstrings_copyElements (my columnLabels, thy labels, 1, n);

		PCA_setNumberOfObservations (thee.peek(), m);

		/*
			The covariance matrix C = A'A / (N-1). However, we have calculated
			the eigenstructure for A'A. This has no consequences for the
			eigenvectors, but the eigenvalues have to be divided by (N-1).
		*/

		for (long i = 1; i <= thy numberOfEigenvalues; i++) {
			thy eigenvalues[i] /= (m - 1);
		}
		return thee;
	} catch (MelderError) {
		Melder_throw (me, U": PCA not created.");
	}
}
예제 #11
0
autoTableOfReal Eigen_and_TableOfReal_project (Eigen me, TableOfReal thee, long from, long numberOfComponents) {
	try {
		if (numberOfComponents == 0) {
			numberOfComponents = my numberOfEigenvalues;
		}

		autoTableOfReal him = TableOfReal_create (thy numberOfRows, numberOfComponents);
		Eigen_and_TableOfReal_project_into (me, thee, from, thy numberOfColumns, him.peek(), 1, numberOfComponents);
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows);
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (U"TableOfReal not created from projection.");
	}
}
예제 #12
0
파일: PCA.cpp 프로젝트: PaulBoersma/praat
autoConfiguration PCA_and_TableOfReal_to_Configuration (PCA me, TableOfReal thee, long numberOfDimensionsToKeep) {
	try {
		if (numberOfDimensionsToKeep == 0 || numberOfDimensionsToKeep > my numberOfEigenvalues) {
			numberOfDimensionsToKeep = my numberOfEigenvalues;
		}
		autoConfiguration him = Configuration_create (thy numberOfRows, numberOfDimensionsToKeep);
		Eigen_and_TableOfReal_into_TableOfReal_projectRows (me, thee, 1, him.get(), 1, numberOfDimensionsToKeep);
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows);
		TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"pc", 1, 1);
		return him;
	} catch (MelderError) {
		Melder_throw (U"Configuration not created from PCA & TableOfReal.");
	}
}
예제 #13
0
autoTableOfReal Discriminant_extractGroupCentroids (Discriminant me) {
    try {
        long m = my groups -> size, n = my eigen -> dimension;
        autoTableOfReal thee = TableOfReal_create (m, n);

        for (long i = 1; i <= m; i ++) {
            SSCP sscp = my groups->at [i];
            TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
            NUMvector_copyElements (sscp -> centroid, thy data [i], 1, n);
        }
        NUMstrings_copyElements (my groups->at [m] -> columnLabels, thy columnLabels, 1, n);
        return thee;
    } catch (MelderError) {
        Melder_throw (me, U": group centroids not extracted.");
    }
}
예제 #14
0
TableOfReal CCA_and_Correlation_factorLoadings (CCA me, Correlation thee)
{
	TableOfReal him;
	long i, j, k, ny = my y -> dimension, nx = my x -> dimension;
	double t, **evecy = my y -> eigenvectors, **evecx = my x -> eigenvectors;

	if (ny + nx != thy numberOfColumns) return Melder_errorp1 (L"The number "
		"of columns in the Correlation object must equal the sum of the "
		"dimensions in the CCA object");

	him = TableOfReal_create (2 * my numberOfCoefficients, thy numberOfColumns);
	if (him == NULL) return NULL;
	
	if (! NUMstrings_copyElements (thy columnLabels, his columnLabels, 
			1, thy numberOfColumns) ||
		! TableOfReal_setSequentialRowLabels (him, 1, my numberOfCoefficients,
			L"dv", 1, 1) ||
		! TableOfReal_setSequentialRowLabels (him, my numberOfCoefficients + 1,
			2 * my numberOfCoefficients, L"iv", 1, 1))
	{
		forget (him);
		return NULL;
	}

	for (i = 1; i <= thy numberOfRows; i++)
	{
		for (j = 1; j <= my numberOfCoefficients; j++)
		{
			t = 0;
			for (k = 1; k <= ny; k++)
			{
				t += thy data[i][k] * evecy[j][k];
			}
			his data[j][i] = t;
		}
		for (j = 1; j <= my numberOfCoefficients; j++)
		{
			t = 0;
			for (k = 1; k <= nx; k++)
			{
				t += thy data[i][ny + k] * evecx[j][k];
			}
			his data[my numberOfCoefficients + j][i] = t;
		}
	}
	return him;
}
예제 #15
0
autoTableOfReal Discriminant_extractGroupStandardDeviations (Discriminant me) {
    try {
        long m = my groups->size, n = my eigen -> dimension;
        autoTableOfReal thee = TableOfReal_create (m, n);

        for (long i = 1; i <= m; i ++) {
            SSCP sscp = my groups->at [i];
            TableOfReal_setRowLabel (thee.get(), i, Thing_getName (sscp));
            long numberOfObservationsm1 = (long) floor (sscp -> numberOfObservations) - 1;
            for (long j = 1; j <= n; j ++) {
                thy data [i] [j] = numberOfObservationsm1 > 0 ? sqrt (sscp -> data [j] [j] / numberOfObservationsm1) : NUMundefined;
            }
        }
        NUMstrings_copyElements (my groups->at [m] -> columnLabels, thy columnLabels, 1, n);
        return thee;
    } catch (MelderError) {
        Melder_throw (me, U": group standard deviations not extracted.");
    }
}
예제 #16
0
TableOfReal Eigen_and_TableOfReal_project (I, thou, long from,
        long numberOfComponents)
{
    iam (Eigen);
    thouart (TableOfReal);
    TableOfReal him;

    if (numberOfComponents == 0) numberOfComponents = my numberOfEigenvalues;

    him = TableOfReal_create (thy numberOfRows, numberOfComponents);
    if (him != NULL)
    {
        if (! Eigen_and_TableOfReal_project_into (me, thee, from,
                thy numberOfColumns, & him, 1, numberOfComponents) ||
                ! NUMstrings_copyElements (thy rowLabels, his rowLabels,
                                           1, thy numberOfRows)) forget (him);
    }
    return him;
}
예제 #17
0
파일: PCA.cpp 프로젝트: PaulBoersma/praat
autoTableOfReal PCA_and_TableOfReal_to_TableOfReal_zscores (PCA me, TableOfReal thee, long numberOfDimensions) {
	try {
		if (numberOfDimensions == 0 || numberOfDimensions > my numberOfEigenvalues) {
			numberOfDimensions = my numberOfEigenvalues;
		}
		autoTableOfReal him = TableOfReal_create (thy numberOfRows, numberOfDimensions);
		for (long i = 1; i <= thy numberOfRows; i++) { /* row */
			for (long j = 1; j <= numberOfDimensions; j++) {
				double r = 0, sigma = sqrt (my eigenvalues[j]);
				for (long k = 1; k <= my dimension; k++) {
					// eigenvector in row, data in row
					r += my eigenvectors[j][k] * (thy data[i][k] - my centroid[k]) / sigma;
				}
				his data[i][j] = r;
			}
		}
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, thy numberOfRows);
		TableOfReal_setSequentialColumnLabels (him.get(), 0, 0, U"pc", 1, 1);
		return him;
	} catch (MelderError) {
		Melder_throw (U"TableOfReal (zscores) not created from PCA & TableOfReal.");
	}
}
예제 #18
0
ClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable (Discriminant me, TableOfReal thee,
        int poolCovarianceMatrices, int useAprioriProbabilities) {
	try {
		long g = Discriminant_getNumberOfGroups (me);
		long p = Eigen_getDimensionOfComponents (me);
		long m = thy numberOfRows;

		if (p != thy numberOfColumns) Melder_throw
			(U"The number of columns does not agree with the dimension of the discriminant.");

		autoNUMvector<double> log_p (1, g);
		autoNUMvector<double> log_apriori (1, g);
		autoNUMvector<double> ln_determinant (1, g);
		autoNUMvector<double> buf (1, p);
		autoNUMvector<SSCP> sscpvec (1, g);
		autoSSCP pool = SSCPs_to_SSCP_pool (my groups);
		autoClassificationTable him = ClassificationTable_create (m, g);
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m);

		// Scale the sscp to become a covariance matrix.

		for (long i = 1; i <= p; i++) {
			for (long k = i; k <= p; k++) {
				pool -> data[k][i] = (pool -> data[i][k] /= (pool -> numberOfObservations - g));
			}
		}

		double lnd;
		autoSSCPs agroups = 0; SSCPs groups;
		if (poolCovarianceMatrices) {
			/*
				Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
				L^-1 will be used later in the Mahalanobis distance calculation:
				v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).
			*/

			NUMlowerCholeskyInverse (pool -> data, p, &lnd);
			for (long j = 1; j <= g; j++) {
				ln_determinant[j] = lnd;
				sscpvec[j] = pool.peek();
			}
			groups = (SSCPs) my groups;
		} else {
			// Calculate the inverses of all group covariance matrices.
			// In case of a singular matrix, substitute inverse of pooled.

			agroups.reset (Data_copy ( (SSCPs) my groups)); groups = agroups.peek();
			long npool = 0;
			for (long j = 1; j <= g; j++) {
				SSCP t = (SSCP) groups -> item[j];
				long no = (long) floor (SSCP_getNumberOfObservations (t));
				for (long i = 1; i <= p; i++) {
					for (long k = i; k <= p; k++) {
						t -> data[k][i] = (t -> data[i][k] /= (no - 1));
					}
				}
				sscpvec[j] = (SSCP) groups -> item[j];
				try {
					NUMlowerCholeskyInverse (t -> data, p, &ln_determinant[j]);
				} catch (MelderError) {
					// Try the alternative: the pooled covariance matrix.
					// Clear the error.

					Melder_clearError ();
					if (npool == 0) {
						NUMlowerCholeskyInverse (pool -> data, p, &lnd);
					}
					npool++;
					sscpvec[j] = pool.peek();
					ln_determinant[j] = lnd;
				}
			}
			if (npool > 0) {
				Melder_warning (npool, U" groups use pooled covariance matrix.");
			}
		}

		// Labels for columns in ClassificationTable

		for (long j = 1; j <= g; j++) {
			const char32 *name = Thing_getName ( (Thing) my groups -> item[j]);
			if (! name) {
				name = U"?";
			}
			TableOfReal_setColumnLabel (him.peek(), j, name);
		}

		// Normalize the sum of the apriori probabilities to 1.
		// Next take ln (p) because otherwise probabilities might be too small to represent.

		NUMvector_normalize1 (my aprioriProbabilities, g);
		double logg = log (g);
		for (long j = 1; j <= g; j++) {
			log_apriori[j] = useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg;
		}

		// Generalized squared distance function:
		// D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)

		for (long i = 1; i <= m; i++) {
			double norm = 0, pt_max = -1e38;
			for (long j = 1; j <= g; j++) {
				SSCP t = (SSCP) groups -> item[j];
				double md = mahalanobisDistanceSq (sscpvec[j] -> data, p, thy data[i], t -> centroid, buf.peek());
				double pt = log_apriori[j] - 0.5 * (ln_determinant[j] + md);
				if (pt > pt_max) {
					pt_max = pt;
				}
				log_p[j] = pt;
			}
			for (long j = 1; j <= g; j++) {
				norm += (log_p[j] = exp (log_p[j] - pt_max));
			}
			for (long j = 1; j <= g; j++) {
				his data[i][j] = log_p[j] / norm;
			}
		}
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (U"ClassificationTable from Discriminant & TableOfReal not created.");
	}
}
예제 #19
0
autoClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable_dw (Discriminant me, TableOfReal thee, int poolCovarianceMatrices, int useAprioriProbabilities, double alpha, double minProb, autoTableOfReal *displacements) {
    try {
        long g = Discriminant_getNumberOfGroups (me);
        long p = Eigen_getDimensionOfComponents (my eigen.get());
        long m = thy numberOfRows;

        if (p != thy numberOfColumns) Melder_throw
            (U"The number of columns does not agree with the dimension of the discriminant.");

        autoNUMvector<double> log_p (1, g);
        autoNUMvector<double> log_apriori (1, g);
        autoNUMvector<double> ln_determinant (1, g);
        autoNUMvector<double> buf (1, p);
        autoNUMvector<double> displacement (1, p);
        autoNUMvector<double> x (1, p);
        autoNUMvector<SSCP> sscpvec (1, g);
        autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get());
        autoClassificationTable him = ClassificationTable_create (m, g);
        NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m);
        autoTableOfReal adisplacements = Data_copy (thee);

        // Scale the sscp to become a covariance matrix.

        for (long i = 1; i <= p; i ++) {
            for (long k = i; k <= p; k ++) {
                pool -> data [k] [i] = pool -> data [i] [k] /= pool -> numberOfObservations - g;
            }
        }

        double lnd;
        autoSSCPList agroups;
        SSCPList groups;
        if (poolCovarianceMatrices) {
            // Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
            // L^-1 will be used later in the Mahalanobis distance calculation:
            // v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).

            NUMlowerCholeskyInverse (pool -> data, p, & lnd);
            for (long j = 1; j <= g; j ++) {
                ln_determinant [j] = lnd;
                sscpvec [j] = pool.get();
            }
            groups = my groups.get();
        } else {
            //Calculate the inverses of all group covariance matrices.
            // In case of a singular matrix, substitute inverse of pooled.

            agroups = Data_copy (my groups.get());
            groups = agroups.get();
            long npool = 0;
            for (long j = 1; j <= g; j ++) {
                SSCP t = groups->at [j];
                long no = (long) floor (SSCP_getNumberOfObservations (t));
                for (long i = 1; i <= p; i ++) {
                    for (long k = i; k <= p; k ++) {
                        t -> data [k] [i] = t -> data [i] [k] /= no - 1;
                    }
                }
                sscpvec [j] = groups->at [j];
                try {
                    NUMlowerCholeskyInverse (t -> data, p, & ln_determinant [j]);
                } catch (MelderError) {
                    // Try the alternative: the pooled covariance matrix.
                    // Clear the error.

                    Melder_clearError ();
                    if (npool == 0) {
                        NUMlowerCholeskyInverse (pool -> data, p, & lnd);
                    }
                    npool ++;
                    sscpvec [j] = pool.get();
                    ln_determinant [j] = lnd;
                }
            }
            if (npool > 0) {
                Melder_warning (npool, U" groups use pooled covariance matrix.");
            }
        }

        // Labels for columns in ClassificationTable

        for (long j = 1; j <= g; j ++) {
            const char32 *name = Thing_getName (my groups->at [j]);
            if (! name) {
                name = U"?";
            }
            TableOfReal_setColumnLabel (him.get(), j, name);
        }

        // Normalize the sum of the apriori probabilities to 1.
        // Next take ln (p) because otherwise probabilities might be too small to represent.

        double logg = log (g);
        NUMvector_normalize1 (my aprioriProbabilities, g);
        for (long j = 1; j <= g; j ++) {
            log_apriori[j] = ( useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg );
        }

        // Generalized squared distance function:
        // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)

        for (long i = 1; i <= m; i ++) {
            SSCP winner;
            double norm = 0, pt_max = -1e308;
            long iwinner = 1;
            for (long k = 1; k <= p; k ++) {
                x [k] = thy data [i] [k] + displacement [k];
            }
            for (long j = 1; j <= g; j ++) {
                SSCP t = groups->at [j];
                double md = mahalanobisDistanceSq (sscpvec [j] -> data, p, x.peek(), t -> centroid, buf.peek());
                double pt = log_apriori [j] - 0.5 * (ln_determinant [j] + md);
                if (pt > pt_max) {
                    pt_max = pt;
                    iwinner = j;
                }
                log_p [j] = pt;
            }
            for (long j = 1; j <= g; j ++) {
                norm += log_p [j] = exp (log_p [j] - pt_max);
            }

            for (long j = 1; j <= g; j ++) {
                his data [i] [j] = log_p [j] / norm;
            }

            // Save old displacement, calculate new displacement

            winner = groups->at [iwinner];
            for (long k = 1; k <= p; k ++) {
                adisplacements -> data [i] [k] = displacement [k];
                if (his data [i] [iwinner] > minProb) {
                    double delta_k = winner -> centroid [k] - x [k];
                    displacement [k] += alpha * delta_k;
                }
            }
        }
        *displacements = adisplacements.move();
        return him;
    } catch (MelderError) {
        Melder_throw (U"ClassificationTable for Weenink procedure not created.");
    }
}