static autoPCA NUMdmatrix_to_PCA (double **m, long numberOfRows, long numberOfColumns, bool byColumns) { try { if (! NUMdmatrix_hasFiniteElements(m, 1, numberOfRows, 1, numberOfColumns)) { Melder_throw (U"At least one of the matrix elements is not finite or undefined."); } if (NUMfrobeniusnorm (numberOfRows, numberOfColumns, m) == 0.0) { Melder_throw (U"All values in your table are zero."); } autoNUMmatrix<double> mcopy; long numberOfRows2, numberOfColumns2; if (byColumns) { if (numberOfColumns < numberOfRows) { Melder_warning (U"The number of columns in your table is less than the number of rows. "); } numberOfRows2 = numberOfColumns, numberOfColumns2 = numberOfRows; mcopy.reset (1, numberOfRows2, 1, numberOfColumns2); for (long i = 1; i <= numberOfRows2; i ++) { // transpose for (long j = 1; j <= numberOfColumns2; j++) { mcopy [i] [j] = m [j] [i]; } } } else { if (numberOfRows < numberOfColumns) { Melder_warning (U"The number of rows in your table is less than the number of columns. "); } numberOfRows2 = numberOfRows, numberOfColumns2 = numberOfColumns; mcopy.reset (1, numberOfRows2, 1, numberOfColumns2); NUMmatrix_copyElements<double>(m, mcopy.peek(), 1, numberOfRows2, 1, numberOfColumns2); } autoPCA thee = Thing_new (PCA); thy centroid = NUMvector<double> (1, numberOfColumns2); NUMcentreColumns (mcopy.peek(), 1, numberOfRows2, 1, numberOfColumns2, thy centroid); Eigen_initFromSquareRoot (thee.get(), mcopy.peek(), numberOfRows2, numberOfColumns2); thy labels = NUMvector<char32 *> (1, numberOfColumns2); PCA_setNumberOfObservations (thee.get(), numberOfRows2); /* The covariance matrix C = A'A / (N-1). However, we have calculated the eigenstructure for A'A. This has no consequences for the eigenvectors, but the eigenvalues have to be divided by (N-1). */ for (long i = 1; i <= thy numberOfEigenvalues; i++) { thy eigenvalues [i] /= (numberOfRows2 - 1); } return thee; } catch (MelderError) { Melder_throw (U"No PCA created from ", byColumns ? U"columns." : U"rows."); } }
autoDiscriminant TableOfReal_to_Discriminant (TableOfReal me) { try { autoDiscriminant thee = Thing_new (Discriminant); long dimension = my numberOfColumns; if (! NUMdmatrix_hasFiniteElements(my data, 1, my numberOfRows, 1, my numberOfColumns)) { Melder_throw (U"At least one of the table's elements is not finite or undefined."); } if (! TableOfReal_hasRowLabels (me)) { Melder_throw (U"At least one of the rows has no label."); } autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me); if (! TableOfReal_hasColumnLabels (mew.get())) { TableOfReal_setSequentialColumnLabels (mew.get(), 0, 0, U"c", 1, 1); } thy groups = TableOfReal_to_SSCPList_byLabel (mew.get()); thy total = TableOfReal_to_SSCP (mew.get(), 0, 0, 0, 0); if ((thy numberOfGroups = thy groups -> size) < 2) { Melder_throw (U"Number of groups must be greater than one."); } TableOfReal_centreColumns_byRowLabel (mew.get()); // Overall centroid and apriori probabilities and costs. autoNUMvector<double> centroid (1, dimension); autoNUMmatrix<double> between (1, thy numberOfGroups, 1, dimension); thy aprioriProbabilities = NUMvector<double> (1, thy numberOfGroups); thy costs = NUMmatrix<double> (1, thy numberOfGroups, 1, thy numberOfGroups); double sum = 0, scale; for (long k = 1; k <= thy numberOfGroups; k ++) { SSCP m = thy groups->at [k]; sum += scale = SSCP_getNumberOfObservations (m); for (long j = 1; j <= dimension; j ++) { centroid [j] += scale * m -> centroid [j]; } } for (long j = 1; j <= dimension; j ++) { centroid [j] /= sum; } for (long k = 1; k <= thy numberOfGroups; k ++) { SSCP m = thy groups->at [k]; scale = SSCP_getNumberOfObservations (m); thy aprioriProbabilities[k] = scale / my numberOfRows; for (long j = 1; j <= dimension; j ++) { between [k] [j] = sqrt (scale) * (m -> centroid [j] - centroid [j]); } } // We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices. // We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for // the eigenvalues and eigenvectors of the equation. thy eigen = Thing_new (Eigen); Eigen_initFromSquareRootPair (thy eigen.get(), between.peek(), thy numberOfGroups, dimension, mew -> data, my numberOfRows); // Default priors and costs for (long igroup = 1; igroup <= thy numberOfGroups; igroup ++) { for (long jgroup = igroup + 1; jgroup <= thy numberOfGroups; jgroup ++) { thy costs [igroup] [jgroup] = thy costs [jgroup] [igroup] = 1.0; } } return thee; } catch (MelderError) { Melder_throw (me, U": Discriminant not created."); } }
autoCCA TableOfReal_to_CCA (TableOfReal me, long ny) { try { long n = my numberOfRows, nx = my numberOfColumns - ny; if (ny < 1 || ny > my numberOfColumns - 1) { Melder_throw (U"Dimension of first part not correct."); } if (ny > nx) { Melder_throw (U"The dimension of the dependent part (", ny, U") must be less than or equal to " "the dimension of the independent part (", nx, U")."); } if (n < ny) { Melder_throw (U"The number of observations must be larger then ", ny, U"."); } if (! NUMdmatrix_hasFiniteElements (my data, 1, my numberOfRows, 1, my numberOfColumns)) { Melder_throw (U"At least one of the table's elements is not finite or undefined.");; } // Use svd as (temporary) storage, and copy data autoSVD svdy = SVD_create (n, ny); autoSVD svdx = SVD_create (n, nx); for (long i = 1; i <= n; i++) { for (long j = 1; j <= ny; j++) { svdy -> u[i][j] = my data[i][j]; } for (long j = 1; j <= nx; j++) { svdx -> u[i][j] = my data[i][ny + j]; } } double **uy = svdy -> u; double **vy = svdy -> v; double **ux = svdx -> u; double **vx = svdx -> v; double fnormy = NUMfrobeniusnorm (n, ny, uy); double fnormx = NUMfrobeniusnorm (n, nx, ux); if (fnormy == 0.0 || fnormx == 0.0) { Melder_throw (U"One of the parts of the table contains only zeros."); } // Centre the data and svd it. NUMcentreColumns (uy, 1, n, 1, ny, nullptr); NUMcentreColumns (ux, 1, n, 1, nx, nullptr); SVD_compute (svdy.get()); SVD_compute (svdx.get()); long numberOfZeroedy = SVD_zeroSmallSingularValues (svdy.get(), 0.0); long numberOfZeroedx = SVD_zeroSmallSingularValues (svdx.get(), 0.0); // Form the matrix C = ux' uy (use svd-object storage) autoSVD svdc = SVD_create (nx, ny); double **uc = svdc -> u; double **vc = svdc -> v; for (long i = 1; i <= nx; i ++) { for (long j = 1; j <= ny; j ++) { double t = 0.0; for (long q = 1; q <= n; q ++) { t += ux [q] [i] * uy [q] [j]; } uc [i] [j] = t; } } SVD_compute (svdc.get()); long numberOfZeroedc = SVD_zeroSmallSingularValues (svdc.get(), 0.0); long numberOfCoefficients = ny - numberOfZeroedc; autoCCA thee = CCA_create (numberOfCoefficients, ny, nx); thy yLabels = strings_to_Strings (my columnLabels, 1, ny); thy xLabels = strings_to_Strings (my columnLabels, ny + 1, my numberOfColumns); double **evecy = thy y -> eigenvectors; double **evecx = thy x -> eigenvectors; thy numberOfObservations = n; /* Y = Vy * inv(Dy) * Vc X = Vx * inv(Dx) * Uc For the eigenvectors we want a row representation: colums(Y) = rows(Y') = rows(Vc' * inv(Dy) * Vy') colums(X) = rows(X') = rows(Uc' * inv(Dx) * Vx') rows(Y') = evecy[i][j] = Vc[k][i] * Vy[j][k] / Dy[k] rows(X') = evecx[i][j] = Uc[k][i] * Vx[j][k] / Dx[k] */ for (long i = 1; i <= numberOfCoefficients; i ++) { double ccc = svdc -> d [i]; thy y -> eigenvalues [i] = thy x -> eigenvalues [i] = ccc * ccc; for (long j = 1; j <= ny; j ++) { double t = 0.0; for (long q = 1; q <= ny - numberOfZeroedy; q ++) { t += vc [q] [i] * vy [j] [q] / svdy -> d [q]; } evecy [i] [j] = t; } for (long j = 1; j <= nx; j ++) { double t = 0.0; for (long q = 1; q <= nx - numberOfZeroedx; q ++) { t += uc [q] [i] * vx [j] [q] / svdx -> d [q]; } evecx [i] [j] = t; } } // Normalize eigenvectors. NUMnormalizeRows (thy y -> eigenvectors, numberOfCoefficients, ny, 1); NUMnormalizeRows (thy x -> eigenvectors, numberOfCoefficients, nx, 1); Melder_assert (thy x -> dimension == thy xLabels -> numberOfStrings && thy y -> dimension == thy yLabels -> numberOfStrings); return thee; } catch (MelderError) { Melder_throw (me, U": CCA not created."); } }