// computes gradient for the whole training set static void gradientForWholeBatch( DataSet training_set, FeatureType* gradient) { memset(gradient, 0, training_set.num_features * sizeof(FeatureType)); float* probabilities_of_positive = new float[training_set.num_data_points]; // computes logistc function for each data point in the training set size_t idx = 0; for (size_t i = 0; i < training_set.num_data_points; i++) { idx = i * training_set.num_features; probabilities_of_positive[i] = logisticFunction( training_set.parameter_vector, &training_set.data_points[idx], training_set.num_features); } // computes difference between // predicted probability and actual label: (PI - Y) addVectors(probabilities_of_positive, training_set.labels, training_set.num_data_points, -1); // finishes computation of gradient: (1/n) * X^T * (PI(theta, X) - YI) float factor = 1.0f / training_set.num_data_points; matrixVectorMultiply(training_set.data_points, probabilities_of_positive, factor, training_set.num_data_points, training_set.num_features, gradient); delete[] probabilities_of_positive; }
matrix tridiagonalize(matrix A){ /* Tridiagonalize a symmetric matrix using Householder transformations. Transformations are performed in place with Householder vectors and coefficients, rather than explicitly forming matrices and explicitly computing their products. Returns T = Q^t * A * Q Source: Golub and Van Loan, "Matrix Computations" p. 415 Input: Matrix A Matrix to tridiagonalize. Output: Matrix T (returned) Tridiagonal matrix similar to input matrix. */ double *v = (double *) malloc(A.n * sizeof(double)) ; double *p = (double *) malloc(A.n * sizeof(double)) ; double *w = (double *) malloc(A.n * sizeof(double)) ; double beta ; double temp ; double t1, t2; int currentVectorLength = A.n - 1 ; int n = A.n ; int iVector, jVector; int i,j,k; for(k=0; k < n-2; k++){ beta = householder(A, k+1, k, v) ; matrixVectorMultiply(A, k+1, k+1, v, beta, p) ; temp = - 0.5 * beta * innerProduct(p, v, currentVectorLength ) ; vectorPlusConstantByVector(w, p, temp, v, currentVectorLength ) ; temp = 0.0; for(j=k+1; j<n; j++){ temp += A.values[j][k] * A.values[j][k] ; } temp = sqrt(temp) ; A.values[k+1][k] = temp ; A.values[k][k+1] = temp ; for(i=k+1, iVector=0; i<n; i++, iVector++){ for(j=k+1, jVector=0; j<n; j++, jVector++){ t1 = v[i]*w[j] ; t2 = v[j]*w[i] ; A.values[i][j] -= (v[iVector]*w[jVector] + v[jVector]*w[iVector]) ; } } currentVectorLength-- ; } // zero off diagonal multipliers for(i=0; i<n-1; i++){ for(j=i+2; j<n; j++){ A.values[i][j] = 0.0 ; A.values[j][i] = 0.0 ; } } free(v) ; free(p) ; free(w) ; return A; }