// computes gradient for the whole training set
static void gradientForWholeBatch(
    DataSet training_set,
    FeatureType* gradient) {

    memset(gradient, 0, training_set.num_features * sizeof(FeatureType));

    float* probabilities_of_positive = new float[training_set.num_data_points];

    // computes logistc function for each data point in the training set
    size_t idx = 0;
    for (size_t i = 0; i < training_set.num_data_points; i++) {

        idx = i * training_set.num_features;

        probabilities_of_positive[i] =  logisticFunction(
                                                training_set.parameter_vector,
                                                &training_set.data_points[idx],
                                                training_set.num_features);
    }

    // computes difference between
    // predicted probability and actual label: (PI - Y)
    addVectors(probabilities_of_positive,
               training_set.labels,
               training_set.num_data_points,
               -1);

    // finishes computation of gradient: (1/n) * X^T * (PI(theta, X) - YI)
    float factor = 1.0f / training_set.num_data_points;
    matrixVectorMultiply(training_set.data_points,
                         probabilities_of_positive,
                         factor,
                         training_set.num_data_points,
                         training_set.num_features,
                         gradient);

    delete[] probabilities_of_positive;
}
matrix tridiagonalize(matrix A){
	/*
	Tridiagonalize a symmetric matrix using Householder transformations.
	Transformations are performed in place with Householder vectors and coefficients,
	rather than explicitly forming matrices and explicitly computing their products.

	Returns T = Q^t * A * Q

	Source: Golub and Van Loan, "Matrix Computations" p. 415

	Input:
	    Matrix A                  Matrix to tridiagonalize.

	  Output:
	    Matrix T (returned)       Tridiagonal matrix similar to input matrix.
	*/


	double *v = (double *) malloc(A.n * sizeof(double)) ;
	double *p = (double *) malloc(A.n * sizeof(double)) ;
	double *w = (double *) malloc(A.n * sizeof(double)) ;
 	double beta ;
 	double temp ;

 	double t1, t2;

 	int currentVectorLength = A.n - 1 ;
 	int n = A.n ;

 	int iVector, jVector;

	int i,j,k;
	for(k=0; k < n-2; k++){
		beta = householder(A, k+1, k, v) ;
		matrixVectorMultiply(A, k+1, k+1, v, beta, p) ;

		temp = - 0.5 * beta * innerProduct(p, v, currentVectorLength ) ;
		vectorPlusConstantByVector(w, p, temp, v, currentVectorLength ) ;

		temp = 0.0;
		for(j=k+1; j<n; j++){
			temp += A.values[j][k] * A.values[j][k] ;
		}
		temp = sqrt(temp) ;
		A.values[k+1][k] = temp ;
		A.values[k][k+1] = temp ;

		for(i=k+1, iVector=0; i<n; i++, iVector++){
			for(j=k+1, jVector=0; j<n; j++, jVector++){
				t1 = v[i]*w[j] ;
				t2 = v[j]*w[i] ;
				A.values[i][j] -= (v[iVector]*w[jVector] + v[jVector]*w[iVector]) ;
			}
		}

		currentVectorLength-- ;
	}

	// zero off diagonal multipliers
	for(i=0; i<n-1; i++){
		for(j=i+2; j<n; j++){
			A.values[i][j] = 0.0 ;
			A.values[j][i] = 0.0 ;
		}
	}

	free(v) ;
	free(p) ;
	free(w) ;

	return A;
}