Ejemplo n.º 1
0
static void Eigen_and_SSCP_project_ (I, thou, Any sscp) {
	iam (Eigen);
	thouart (SSCP);
	SSCP him = (SSCP) sscp;

	for (long i = 1; i <= my numberOfEigenvalues; i++) {
		for (long j = i; j <= my numberOfEigenvalues; j++) {
			double tmp = 0;
			for (long k = 1; k <= my dimension; k++) {
				for (long m = 1; m <= my dimension; m++) {
					tmp += my eigenvectors[i][k] * thy data[k][m] * my eigenvectors[j][m];
				}
			}
			his data[i][j] = his data[j][i] = tmp;
		}

		double tmp = 0;
		for (long m = 1; m <= my dimension; m++) {
			tmp += thy centroid[m] * my eigenvectors[i][m];
		}
		his centroid[i] = tmp;
	}
	his numberOfObservations = SSCP_getNumberOfObservations (thee);
}
Ejemplo n.º 2
0
autoDiscriminant TableOfReal_to_Discriminant (TableOfReal me) {
    try {
        autoDiscriminant thee = Thing_new (Discriminant);
        long dimension = my numberOfColumns;

        if (! NUMdmatrix_hasFiniteElements(my data, 1, my numberOfRows, 1, my numberOfColumns)) {
            Melder_throw (U"At least one of the table's elements is not finite or undefined.");
        }

        if (! TableOfReal_hasRowLabels (me)) {
            Melder_throw (U"At least one of the rows has no label.");
        }

        autoTableOfReal mew = TableOfReal_sortOnlyByRowLabels (me);
        if (! TableOfReal_hasColumnLabels (mew.get())) {
            TableOfReal_setSequentialColumnLabels (mew.get(), 0, 0, U"c", 1, 1);
        }

        thy groups = TableOfReal_to_SSCPList_byLabel (mew.get());
        thy total = TableOfReal_to_SSCP (mew.get(), 0, 0, 0, 0);

        if ((thy numberOfGroups = thy groups -> size) < 2) {
            Melder_throw (U"Number of groups must be greater than one.");
        }

        TableOfReal_centreColumns_byRowLabel (mew.get());

        // Overall centroid and apriori probabilities and costs.

        autoNUMvector<double> centroid (1, dimension);
        autoNUMmatrix<double> between (1, thy numberOfGroups, 1, dimension);
        thy aprioriProbabilities = NUMvector<double> (1, thy numberOfGroups);
        thy costs = NUMmatrix<double> (1, thy numberOfGroups, 1, thy numberOfGroups);

        double sum = 0, scale;
        for (long k = 1; k <= thy numberOfGroups; k ++) {
            SSCP m = thy groups->at [k];
            sum += scale = SSCP_getNumberOfObservations (m);
            for (long j = 1; j <= dimension; j ++) {
                centroid [j] += scale * m -> centroid [j];
            }
        }

        for	(long j = 1; j <= dimension; j ++) {
            centroid [j] /= sum;
        }

        for (long k = 1; k <= thy numberOfGroups; k ++) {
            SSCP m = thy groups->at [k];
            scale = SSCP_getNumberOfObservations (m);
            thy aprioriProbabilities[k] = scale / my numberOfRows;
            for (long j = 1; j <= dimension; j ++) {
                between [k] [j] = sqrt (scale) * (m -> centroid [j] - centroid [j]);
            }
        }

        // We need to solve B'B.x = lambda W'W.x, where B'B and W'W are the between and within covariance matrices.
        // We do not calculate these covariance matrices directly from the data but instead use the GSVD to solve for
        // the eigenvalues and eigenvectors of the equation.

        thy eigen = Thing_new (Eigen);
        Eigen_initFromSquareRootPair (thy eigen.get(), between.peek(), thy numberOfGroups, dimension, mew -> data, my numberOfRows);

        // Default priors and costs

        for (long igroup = 1; igroup <= thy numberOfGroups; igroup ++) {
            for (long jgroup = igroup + 1; jgroup <= thy numberOfGroups; jgroup ++) {
                thy costs [igroup] [jgroup] = thy costs [jgroup] [igroup] = 1.0;
            }
        }
        return thee;
    } catch (MelderError) {
        Melder_throw (me, U": Discriminant not created.");
    }
}
Ejemplo n.º 3
0
autoClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable_dw (Discriminant me, TableOfReal thee, int poolCovarianceMatrices, int useAprioriProbabilities, double alpha, double minProb, autoTableOfReal *displacements) {
    try {
        long g = Discriminant_getNumberOfGroups (me);
        long p = Eigen_getDimensionOfComponents (my eigen.get());
        long m = thy numberOfRows;

        if (p != thy numberOfColumns) Melder_throw
            (U"The number of columns does not agree with the dimension of the discriminant.");

        autoNUMvector<double> log_p (1, g);
        autoNUMvector<double> log_apriori (1, g);
        autoNUMvector<double> ln_determinant (1, g);
        autoNUMvector<double> buf (1, p);
        autoNUMvector<double> displacement (1, p);
        autoNUMvector<double> x (1, p);
        autoNUMvector<SSCP> sscpvec (1, g);
        autoSSCP pool = SSCPList_to_SSCP_pool (my groups.get());
        autoClassificationTable him = ClassificationTable_create (m, g);
        NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m);
        autoTableOfReal adisplacements = Data_copy (thee);

        // Scale the sscp to become a covariance matrix.

        for (long i = 1; i <= p; i ++) {
            for (long k = i; k <= p; k ++) {
                pool -> data [k] [i] = pool -> data [i] [k] /= pool -> numberOfObservations - g;
            }
        }

        double lnd;
        autoSSCPList agroups;
        SSCPList groups;
        if (poolCovarianceMatrices) {
            // Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
            // L^-1 will be used later in the Mahalanobis distance calculation:
            // v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).

            NUMlowerCholeskyInverse (pool -> data, p, & lnd);
            for (long j = 1; j <= g; j ++) {
                ln_determinant [j] = lnd;
                sscpvec [j] = pool.get();
            }
            groups = my groups.get();
        } else {
            //Calculate the inverses of all group covariance matrices.
            // In case of a singular matrix, substitute inverse of pooled.

            agroups = Data_copy (my groups.get());
            groups = agroups.get();
            long npool = 0;
            for (long j = 1; j <= g; j ++) {
                SSCP t = groups->at [j];
                long no = (long) floor (SSCP_getNumberOfObservations (t));
                for (long i = 1; i <= p; i ++) {
                    for (long k = i; k <= p; k ++) {
                        t -> data [k] [i] = t -> data [i] [k] /= no - 1;
                    }
                }
                sscpvec [j] = groups->at [j];
                try {
                    NUMlowerCholeskyInverse (t -> data, p, & ln_determinant [j]);
                } catch (MelderError) {
                    // Try the alternative: the pooled covariance matrix.
                    // Clear the error.

                    Melder_clearError ();
                    if (npool == 0) {
                        NUMlowerCholeskyInverse (pool -> data, p, & lnd);
                    }
                    npool ++;
                    sscpvec [j] = pool.get();
                    ln_determinant [j] = lnd;
                }
            }
            if (npool > 0) {
                Melder_warning (npool, U" groups use pooled covariance matrix.");
            }
        }

        // Labels for columns in ClassificationTable

        for (long j = 1; j <= g; j ++) {
            const char32 *name = Thing_getName (my groups->at [j]);
            if (! name) {
                name = U"?";
            }
            TableOfReal_setColumnLabel (him.get(), j, name);
        }

        // Normalize the sum of the apriori probabilities to 1.
        // Next take ln (p) because otherwise probabilities might be too small to represent.

        double logg = log (g);
        NUMvector_normalize1 (my aprioriProbabilities, g);
        for (long j = 1; j <= g; j ++) {
            log_apriori[j] = ( useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg );
        }

        // Generalized squared distance function:
        // D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)

        for (long i = 1; i <= m; i ++) {
            SSCP winner;
            double norm = 0, pt_max = -1e308;
            long iwinner = 1;
            for (long k = 1; k <= p; k ++) {
                x [k] = thy data [i] [k] + displacement [k];
            }
            for (long j = 1; j <= g; j ++) {
                SSCP t = groups->at [j];
                double md = mahalanobisDistanceSq (sscpvec [j] -> data, p, x.peek(), t -> centroid, buf.peek());
                double pt = log_apriori [j] - 0.5 * (ln_determinant [j] + md);
                if (pt > pt_max) {
                    pt_max = pt;
                    iwinner = j;
                }
                log_p [j] = pt;
            }
            for (long j = 1; j <= g; j ++) {
                norm += log_p [j] = exp (log_p [j] - pt_max);
            }

            for (long j = 1; j <= g; j ++) {
                his data [i] [j] = log_p [j] / norm;
            }

            // Save old displacement, calculate new displacement

            winner = groups->at [iwinner];
            for (long k = 1; k <= p; k ++) {
                adisplacements -> data [i] [k] = displacement [k];
                if (his data [i] [iwinner] > minProb) {
                    double delta_k = winner -> centroid [k] - x [k];
                    displacement [k] += alpha * delta_k;
                }
            }
        }
        *displacements = adisplacements.move();
        return him;
    } catch (MelderError) {
        Melder_throw (U"ClassificationTable for Weenink procedure not created.");
    }
}
Ejemplo n.º 4
0
ClassificationTable Discriminant_and_TableOfReal_to_ClassificationTable (Discriminant me, TableOfReal thee,
        int poolCovarianceMatrices, int useAprioriProbabilities) {
	try {
		long g = Discriminant_getNumberOfGroups (me);
		long p = Eigen_getDimensionOfComponents (me);
		long m = thy numberOfRows;

		if (p != thy numberOfColumns) Melder_throw
			(U"The number of columns does not agree with the dimension of the discriminant.");

		autoNUMvector<double> log_p (1, g);
		autoNUMvector<double> log_apriori (1, g);
		autoNUMvector<double> ln_determinant (1, g);
		autoNUMvector<double> buf (1, p);
		autoNUMvector<SSCP> sscpvec (1, g);
		autoSSCP pool = SSCPs_to_SSCP_pool (my groups);
		autoClassificationTable him = ClassificationTable_create (m, g);
		NUMstrings_copyElements (thy rowLabels, his rowLabels, 1, m);

		// Scale the sscp to become a covariance matrix.

		for (long i = 1; i <= p; i++) {
			for (long k = i; k <= p; k++) {
				pool -> data[k][i] = (pool -> data[i][k] /= (pool -> numberOfObservations - g));
			}
		}

		double lnd;
		autoSSCPs agroups = 0; SSCPs groups;
		if (poolCovarianceMatrices) {
			/*
				Covariance matrix S can be decomposed as S = L.L'. Calculate L^-1.
				L^-1 will be used later in the Mahalanobis distance calculation:
				v'.S^-1.v == v'.L^-1'.L^-1.v == (L^-1.v)'.(L^-1.v).
			*/

			NUMlowerCholeskyInverse (pool -> data, p, &lnd);
			for (long j = 1; j <= g; j++) {
				ln_determinant[j] = lnd;
				sscpvec[j] = pool.peek();
			}
			groups = (SSCPs) my groups;
		} else {
			// Calculate the inverses of all group covariance matrices.
			// In case of a singular matrix, substitute inverse of pooled.

			agroups.reset (Data_copy ( (SSCPs) my groups)); groups = agroups.peek();
			long npool = 0;
			for (long j = 1; j <= g; j++) {
				SSCP t = (SSCP) groups -> item[j];
				long no = (long) floor (SSCP_getNumberOfObservations (t));
				for (long i = 1; i <= p; i++) {
					for (long k = i; k <= p; k++) {
						t -> data[k][i] = (t -> data[i][k] /= (no - 1));
					}
				}
				sscpvec[j] = (SSCP) groups -> item[j];
				try {
					NUMlowerCholeskyInverse (t -> data, p, &ln_determinant[j]);
				} catch (MelderError) {
					// Try the alternative: the pooled covariance matrix.
					// Clear the error.

					Melder_clearError ();
					if (npool == 0) {
						NUMlowerCholeskyInverse (pool -> data, p, &lnd);
					}
					npool++;
					sscpvec[j] = pool.peek();
					ln_determinant[j] = lnd;
				}
			}
			if (npool > 0) {
				Melder_warning (npool, U" groups use pooled covariance matrix.");
			}
		}

		// Labels for columns in ClassificationTable

		for (long j = 1; j <= g; j++) {
			const char32 *name = Thing_getName ( (Thing) my groups -> item[j]);
			if (! name) {
				name = U"?";
			}
			TableOfReal_setColumnLabel (him.peek(), j, name);
		}

		// Normalize the sum of the apriori probabilities to 1.
		// Next take ln (p) because otherwise probabilities might be too small to represent.

		NUMvector_normalize1 (my aprioriProbabilities, g);
		double logg = log (g);
		for (long j = 1; j <= g; j++) {
			log_apriori[j] = useAprioriProbabilities ? log (my aprioriProbabilities[j]) : - logg;
		}

		// Generalized squared distance function:
		// D^2(x) = (x - mu)' S^-1 (x - mu) + ln (determinant(S)) - 2 ln (apriori)

		for (long i = 1; i <= m; i++) {
			double norm = 0, pt_max = -1e38;
			for (long j = 1; j <= g; j++) {
				SSCP t = (SSCP) groups -> item[j];
				double md = mahalanobisDistanceSq (sscpvec[j] -> data, p, thy data[i], t -> centroid, buf.peek());
				double pt = log_apriori[j] - 0.5 * (ln_determinant[j] + md);
				if (pt > pt_max) {
					pt_max = pt;
				}
				log_p[j] = pt;
			}
			for (long j = 1; j <= g; j++) {
				norm += (log_p[j] = exp (log_p[j] - pt_max));
			}
			for (long j = 1; j <= g; j++) {
				his data[i][j] = log_p[j] / norm;
			}
		}
		return him.transfer();
	} catch (MelderError) {
		Melder_throw (U"ClassificationTable from Discriminant & TableOfReal not created.");
	}
}