void AAKR::normalize(Math::Matrix& mean, Math::Matrix& std) { // Resize mean and standard deviation variables. mean.resizeAndFill(1, sampleSize(), 0); std.resizeAndFill(1, sampleSize(), 0); // Compute mean. for (unsigned i = 0; i < sampleSize(); i++) mean(i) = sum(m_data.get(0, m_num_values - 1, i, i)) / m_num_values; // Compute standard deviation. for (unsigned j = 0; j < sampleSize(); j++) { double sum = 0; // Sum of the power of two difference // between the value and the mean. for (unsigned i = 0; i < m_num_values; i++) sum += std::pow(m_data(i, j) - mean(j), 2); // Standard deviation. std(j) = std::sqrt(sum / m_num_values); // Normalize each member of the data set. for (unsigned i = 0; i < m_num_values; i++) { if (std(j)) m_norm(i, j) = (m_data(i, j) - mean(j)) / std(j); else m_norm(i, j) = 0; } } }
/** * Compute the "angle" between two matrices. * * A and B are treated as column vectors, so that * the angle between them is: * * acos(-A * B / (||A|| * ||B||)) * * @param A pointer to matrix * @param B pointer to matrix * @return angle between A and B */ precision_t m_angle(matrix_t *A, matrix_t *B) { assert(A->rows == B->rows && A->cols == B->cols); // compute A * B precision_t a_dot_b = 0; int i, j; for ( i = 0; i < A->rows; i++ ) { for ( j = 0; j < A->cols; j++ ) { a_dot_b += elem(A, i, j) * elem(B, i, j); } } return acos(-a_dot_b / (m_norm(A) * m_norm(B))); }
/** * Implementation of the learning rule described in Bell & Sejnowski, * Vision Research, in press for 1997, that contained the natural * gradient (W' * W). * * Bell & Sejnowski hold the patent for this learning rule. * * SEP goes once through the mixed signals X in batch blocks of size B, * adjusting weights W at the end of each block. * * sepout is called every F counts. * * I suggest a learning rate (L) of 0.006, and a block size (B) of * 300, at least for 2->2 separation. When annealing to the right * solution for 10->10, however, L < 0.0001 and B = 10 were most successful. * * @param X "sphered" input matrix * @param W weight matrix * @param B block size * @param L learning rate * @param F interval to print training stats */ void sep96(matrix_t *X, matrix_t *W, int B, double L, int F) { matrix_t *BI = m_identity(X->rows); m_elem_mult(BI, B); int t; for ( t = 0; t < X->cols; t += B ) { int end = (t + B < X->cols) ? t + B : X->cols; matrix_t *W0 = m_copy(W); matrix_t *X_batch = m_copy_columns(X, t, end); matrix_t *U = m_product(W0, X_batch); // compute Y' = 1 - 2 * f(U), f(u) = 1 / (1 + e^(-u)) matrix_t *Y_p = m_initialize(U->rows, U->cols); int i, j; for ( i = 0; i < Y_p->rows; i++ ) { for ( j = 0; j < Y_p->cols; j++ ) { elem(Y_p, i, j) = 1 - 2 * (1 / (1 + exp(-elem(U, i, j)))); } } // compute dW = L * (BI + Y'U') * W0 matrix_t *U_tr = m_transpose(U); matrix_t *dW_temp1 = m_product(Y_p, U_tr); m_add(dW_temp1, BI); matrix_t *dW = m_product(dW_temp1, W0); m_elem_mult(dW, L); // compute W = W0 + dW m_add(W, dW); // print training stats if ( t % F == 0 ) { precision_t norm = m_norm(dW); precision_t angle = m_angle(W0, W); printf("*** norm(dW) = %.4lf, angle(W0, W) = %.1lf deg\n", norm, 180 * angle / M_PI); } // cleanup m_free(W0); m_free(X_batch); m_free(U); m_free(Y_p); m_free(U_tr); m_free(dW_temp1); m_free(dW); } }