SVD SVD_create_d (double **m, long numberOfRows, long numberOfColumns) {
	try {
		autoSVD me = SVD_create (numberOfRows, numberOfColumns);
		SVD_svd_d (me.peek(), m);
		return me.transfer();
	} catch (MelderError) {
		Melder_throw ("SVD not created from vector.");
	}
}
static void NUMmaximizeCongruence (double **b, double **a, long nr, long nc, double **t, long maximumNumberOfIterations, double tolerance) {
	long numberOfIterations = 0;
	Melder_assert (nr > 0 && nc > 0);
	Melder_assert (t);

	if (nc == 1) {
		t[1][1] = 1; return;
	}
	autoNUMmatrix<double> c (1, nc, 1, nc);
	autoNUMmatrix<double> w (1, nc, 1, nc);
	autoNUMmatrix<double> u (1, nc, 1, nc);
	autoNUMvector<double> evec (1, nc);
	autoSVD svd = SVD_create (nc, nc);

	// Steps 1 & 2: C = A'A and W = A'B

	double checkc = 0, checkw = 0;
	for (long i = 1; i <= nc; i++) {
		for (long j = 1; j <= nc; j++) {
			for (long k = 1; k <= nr; k++) {
				c[i][j] += a[k][i] * a[k][j];
				w[i][j] += a[k][i] * b[k][j];
			}
			checkc += c[i][j]; checkw += w[i][j];
		}
	}

	if (checkc == 0.0 || checkw == 0.0) {
		Melder_throw (U"NUMmaximizeCongruence: we cannot rotate a zero matrix.");
	}

	// Scale W by (diag(B'B))^-1/2

	for (long j = 1; j <= nc; j++) {
		double scale = 0.0;
		for (long k = 1; k <= nr; k++) {
			scale += b[k][j] * b[k][j];
		}
		if (scale > 0.0) {
			scale = 1.0 / sqrt (scale);
		}
		for (long i = 1; i <= nc; i++) {
			w[i][j] *= scale;
		}
	}

	// Step 3: largest eigenvalue of C

	evec[1] = 1.0;
	double rho, f, f_old;
	NUMdominantEigenvector (c.peek(), nc, evec.peek(), &rho, 1.0e-6);

	do_steps45 (w.peek(), t, c.peek(), nc, &f);
	do {
		for (long j = 1; j <= nc; j++) {
			// Step 7.a

			double p = 0.0;
			for (long k = 1; k <= nc; k++) {
				for (long i = 1; i <= nc; i++) {
					p += t[k][j] * c[k][i] * t[i][j];
				}
			}

			// Step 7.b

			double q = 0.0;
			for (long k = 1; k <= nc; k++) {
				q += w[k][j] * t[k][j];
			}

			// Step 7.c

			if (q == 0.0) {
				for (long i = 1; i <= nc; i++) {
					u[i][j] = 0.0;
				}
			} else {
				double ww = 0.0;
				for (long k = 1; k <= nc; k++) {
					ww += w[k][j] * w[k][j];
				}
				for (long i = 1; i <= nc; i++) {
					double ct = 0.0;
					for (long k = 1; k <= nc; k++) {
						ct += c[i][k] * t[k][j];
					}
					u[i][j] = (q * (ct - rho * t[i][j]) / p - 2.0 * ww * t[i][j] / q - w[i][j]) / sqrt (p);
				}
			}
		}

		// Step 8

		SVD_svd_d (svd.get(), u.peek());

		// Step 9

		for (long i = 1; i <= nc; i++) {
			for (long j = 1; j <= nc; j++) {
				t[i][j] = 0.0;
				for (long  k = 1; k <= nc; k++) {
					t[i][j] -= svd -> u[i][k] * svd -> v[j][k];
				}
			}
		}

		numberOfIterations++;
		f_old = f;

		// Steps 10 & 11 equal steps 4 & 5

		do_steps45 (w.peek(), t, c.peek(), nc, &f);

	} while (fabs (f_old - f) / f_old > tolerance && numberOfIterations < maximumNumberOfIterations);
}