static void Eigen_and_SSCP_project_ (I, thou, Any sscp) { iam (Eigen); thouart (SSCP); SSCP him = (SSCP) sscp; for (long i = 1; i <= my numberOfEigenvalues; i++) { for (long j = i; j <= my numberOfEigenvalues; j++) { double tmp = 0; for (long k = 1; k <= my dimension; k++) { for (long m = 1; m <= my dimension; m++) { tmp += my eigenvectors[i][k] * thy data[k][m] * my eigenvectors[j][m]; } } his data[i][j] = his data[j][i] = tmp; } double tmp = 0; for (long m = 1; m <= my dimension; m++) { tmp += thy centroid[m] * my eigenvectors[i][m]; } his centroid[i] = tmp; } his numberOfObservations = SSCP_getNumberOfObservations (thee); }
autoDiscriminant TableOfReal_to_Discriminant (TableOfReal me) { try { autoDiscriminant thee = Thing_new (Discriminant); long dimension = my numberOfColumns; if (! NUMdmatrix_hasFiniteElements(my data, 1, my numberOfRows, 1, my numberOfColumns)) { Melder_throw (U"At least one of the table's elements is not finite or undefined."); } if (! TableOfReal_hasRowLabels (me)) { Melder_throw (U"At least one of the rows has no label."); } autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me); if (! TableOfReal_hasColumnLabels (mew.get())) { TableOfReal_setSequentialColumnLabels (mew.get(), 0, 0, U"c", 1, 1); } thy groups = TableOfReal_to_SSCPList_byLabel (mew.get()); thy total = TableOfReal_to_SSCP (mew.get(), 0, 0, 0, 0); if ((thy numberOfGroups = thy groups -> size) < 2) { Melder_throw (U"Number of groups must be greater than one."); } TableOfReal_centreColumns_byRowLabel (mew.get()); // Overall centroid and apriori probabilities and costs. autoNUMvector<double> centroid (1, dimension); autoNUMmatrix<double> between (1, thy numberOfGroups, 1, dimension); thy aprioriProbabilities = NUMvector<double> (1, thy numberOfGroups); thy costs = NUMmatrix<double> (1, thy numberOfGroups, 1, thy numberOfGroups); double sum = 0, scale; for (long k = 1; k <= thy numberOfGroups; k ++) { SSCP m = thy groups->at [k]; sum += scale = SSCP_getNumberOfObservations (m); for (long j = 1; j <= dimension; j ++) { centroid [j] += scale * m -> centroid [j]; } } for (long j = 1; j <= dimension; j ++) { centroid [j] /= sum; } for (long k = 1; k <= thy numberOfGroups; k ++) { SSCP m = thy groups->at [k]; scale = SSCP_getNumberOfObservations (m); thy aprioriProbabilities[k] = scale / my numberOfRows; for (long j = 1; j <= dimension; j ++) { between [k] [j] = sqrt (scale) * (m -> centroid [j] - centroid [j]); } } // We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices. // We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for // the eigenvalues and eigenvectors of the equation. thy eigen = Thing_new (Eigen); Eigen_initFromSquareRootPair (thy eigen.get(), between.peek(), thy numberOfGroups, dimension, mew -> data, my numberOfRows); // Default priors and costs for (long igroup = 1; igroup <= thy numberOfGroups; igroup ++) { for (long jgroup = igroup + 1; jgroup <= thy numberOfGroups; jgroup ++) { thy costs [igroup] [jgroup] = thy costs [jgroup] [igroup] = 1.0; } } return thee; } catch (MelderError) { Melder_throw (me, U": Discriminant not created."); } }
autoClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable_dw (Discriminant me, TableOfReal thee, int poolCovarianceMatrices, int useAprioriProbabilities, double alpha, double minProb, autoTableOfReal *displacements) { try { long g = Discriminant_getNumberOfGroups (me); long p = Eigen_getDimensionOfComponents (my eigen.get()); long m = thy numberOfRows; if (p != thy numberOfColumns) Melder_throw (U"The number of columns does not agree with the dimension of the discriminant."); autoNUMvector<double> log_p (1, g); autoNUMvector<double> log_apriori (1, g); autoNUMvector<double> ln_determinant (1, g); autoNUMvector<double> buf (1, p); autoNUMvector<double> displacement (1, p); autoNUMvector<double> x (1, p); autoNUMvector<SSCP> sscpvec (1, g); autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get()); autoClassificationTable him = ClassificationTable_create (m, g); NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m); autoTableOfReal adisplacements = Data_copy (thee); // Scale the sscp to become a covariance matrix. for (long i = 1; i <= p; i ++) { for (long k = i; k <= p; k ++) { pool -> data [k] [i] = pool -> data [i] [k] /= pool -> numberOfObservations - g; } } double lnd; autoSSCPList agroups; SSCPList groups; if (poolCovarianceMatrices) { // Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1. // L^-1 will be used later in the Mahalanobis distance calculation: // v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v). NUMlowerCholeskyInverse (pool -> data, p, & lnd); for (long j = 1; j <= g; j ++) { ln_determinant [j] = lnd; sscpvec [j] = pool.get(); } groups = my groups.get(); } else { //Calculate the inverses of all group covariance matrices. // In case of a singular matrix, substitute inverse of pooled. agroups = Data_copy (my groups.get()); groups = agroups.get(); long npool = 0; for (long j = 1; j <= g; j ++) { SSCP t = groups->at [j]; long no = (long) floor (SSCP_getNumberOfObservations (t)); for (long i = 1; i <= p; i ++) { for (long k = i; k <= p; k ++) { t -> data [k] [i] = t -> data [i] [k] /= no - 1; } } sscpvec [j] = groups->at [j]; try { NUMlowerCholeskyInverse (t -> data, p, & ln_determinant [j]); } catch (MelderError) { // Try the alternative: the pooled covariance matrix. // Clear the error. Melder_clearError (); if (npool == 0) { NUMlowerCholeskyInverse (pool -> data, p, & lnd); } npool ++; sscpvec [j] = pool.get(); ln_determinant [j] = lnd; } } if (npool > 0) { Melder_warning (npool, U" groups use pooled covariance matrix."); } } // Labels for columns in ClassificationTable for (long j = 1; j <= g; j ++) { const char32 *name = Thing_getName (my groups->at [j]); if (! name) { name = U"?"; } TableOfReal_setColumnLabel (him.get(), j, name); } // Normalize the sum of the apriori probabilities to 1. // Next take ln (p) because otherwise probabilities might be too small to represent. double logg = log (g); NUMvector_normalize1 (my aprioriProbabilities, g); for (long j = 1; j <= g; j ++) { log_apriori[j] = ( useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg ); } // Generalized squared distance function: // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori) for (long i = 1; i <= m; i ++) { SSCP winner; double norm = 0, pt_max = -1e308; long iwinner = 1; for (long k = 1; k <= p; k ++) { x [k] = thy data [i] [k] + displacement [k]; } for (long j = 1; j <= g; j ++) { SSCP t = groups->at [j]; double md = mahalanobisDistanceSq (sscpvec [j] -> data, p, x.peek(), t -> centroid, buf.peek()); double pt = log_apriori [j] - 0.5 * (ln_determinant [j] + md); if (pt > pt_max) { pt_max = pt; iwinner = j; } log_p [j] = pt; } for (long j = 1; j <= g; j ++) { norm += log_p [j] = exp (log_p [j] - pt_max); } for (long j = 1; j <= g; j ++) { his data [i] [j] = log_p [j] / norm; } // Save old displacement, calculate new displacement winner = groups->at [iwinner]; for (long k = 1; k <= p; k ++) { adisplacements -> data [i] [k] = displacement [k]; if (his data [i] [iwinner] > minProb) { double delta_k = winner -> centroid [k] - x [k]; displacement [k] += alpha * delta_k; } } } *displacements = adisplacements.move(); return him; } catch (MelderError) { Melder_throw (U"ClassificationTable for Weenink procedure not created."); } }
ClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable (Discriminant me, TableOfReal thee, int poolCovarianceMatrices, int useAprioriProbabilities) { try { long g = Discriminant_getNumberOfGroups (me); long p = Eigen_getDimensionOfComponents (me); long m = thy numberOfRows; if (p != thy numberOfColumns) Melder_throw (U"The number of columns does not agree with the dimension of the discriminant."); autoNUMvector<double> log_p (1, g); autoNUMvector<double> log_apriori (1, g); autoNUMvector<double> ln_determinant (1, g); autoNUMvector<double> buf (1, p); autoNUMvector<SSCP> sscpvec (1, g); autoSSCP pool = SSCPs_to_SSCP_pool (my groups); autoClassificationTable him = ClassificationTable_create (m, g); NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m); // Scale the sscp to become a covariance matrix. for (long i = 1; i <= p; i++) { for (long k = i; k <= p; k++) { pool -> data[k][i] = (pool -> data[i][k] /= (pool -> numberOfObservations - g)); } } double lnd; autoSSCPs agroups = 0; SSCPs groups; if (poolCovarianceMatrices) { /* Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1. L^-1 will be used later in the Mahalanobis distance calculation: v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v). */ NUMlowerCholeskyInverse (pool -> data, p, &lnd); for (long j = 1; j <= g; j++) { ln_determinant[j] = lnd; sscpvec[j] = pool.peek(); } groups = (SSCPs) my groups; } else { // Calculate the inverses of all group covariance matrices. // In case of a singular matrix, substitute inverse of pooled. agroups.reset (Data_copy ( (SSCPs) my groups)); groups = agroups.peek(); long npool = 0; for (long j = 1; j <= g; j++) { SSCP t = (SSCP) groups -> item[j]; long no = (long) floor (SSCP_getNumberOfObservations (t)); for (long i = 1; i <= p; i++) { for (long k = i; k <= p; k++) { t -> data[k][i] = (t -> data[i][k] /= (no - 1)); } } sscpvec[j] = (SSCP) groups -> item[j]; try { NUMlowerCholeskyInverse (t -> data, p, &ln_determinant[j]); } catch (MelderError) { // Try the alternative: the pooled covariance matrix. // Clear the error. Melder_clearError (); if (npool == 0) { NUMlowerCholeskyInverse (pool -> data, p, &lnd); } npool++; sscpvec[j] = pool.peek(); ln_determinant[j] = lnd; } } if (npool > 0) { Melder_warning (npool, U" groups use pooled covariance matrix."); } } // Labels for columns in ClassificationTable for (long j = 1; j <= g; j++) { const char32 *name = Thing_getName ( (Thing) my groups -> item[j]); if (! name) { name = U"?"; } TableOfReal_setColumnLabel (him.peek(), j, name); } // Normalize the sum of the apriori probabilities to 1. // Next take ln (p) because otherwise probabilities might be too small to represent. NUMvector_normalize1 (my aprioriProbabilities, g); double logg = log (g); for (long j = 1; j <= g; j++) { log_apriori[j] = useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg; } // Generalized squared distance function: // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori) for (long i = 1; i <= m; i++) { double norm = 0, pt_max = -1e38; for (long j = 1; j <= g; j++) { SSCP t = (SSCP) groups -> item[j]; double md = mahalanobisDistanceSq (sscpvec[j] -> data, p, thy data[i], t -> centroid, buf.peek()); double pt = log_apriori[j] - 0.5 * (ln_determinant[j] + md); if (pt > pt_max) { pt_max = pt; } log_p[j] = pt; } for (long j = 1; j <= g; j++) { norm += (log_p[j] = exp (log_p[j] - pt_max)); } for (long j = 1; j <= g; j++) { his data[i][j] = log_p[j] / norm; } } return him.transfer(); } catch (MelderError) { Melder_throw (U"ClassificationTable from Discriminant & TableOfReal not created."); } }