std::vector<std::vector<IMatrix<float>*>> NeuralNetAnalyzer::approximate_weight_hessian(NeuralNet &net)
{
	//setup output
	std::vector<std::vector<IMatrix<float>*>> output(net.layers.size());
	for (int i = 0; i < output.size(); ++i)
	{
		output[i] = std::vector<IMatrix<float>*>(net.layers[i]->recognition_weights.size());
		for (int j = 0; j < output[i].size(); ++j)
			output[i][j] = net.layers[i]->recognition_weights[j]->clone();
	}

	//find error for current network
	net.discriminate();
	float original_error = net.global_error();

	//begin evaluating derivatives of the error numerically for only one weight at a time, this requires the network to be ran for every single weight
	for (int l = 0; l < net.layers.size(); ++l)
	{
		for (int d = 0; d < net.layers[l]->recognition_weights.size(); ++d)
		{
			for (int i = 0; i < net.layers[l]->recognition_weights[d]->rows(); ++i)
			{
				for (int j = 0; j < net.layers[l]->recognition_weights[d]->cols(); ++j)
				{
					//adjust current weight
					net.layers[l]->recognition_weights[d]->at(i, j) -= .001f;

					//evaluate network with adjusted weight
					net.discriminate();
					float h_minus = net.global_error();

					//adjust current weight
					net.layers[l]->recognition_weights[d]->at(i, j) += .002f;

					//evaluate network with adjusted weight
					net.discriminate();
					float h = net.global_error();

					//approximate with derivative
					output[l][d]->at(i, j) = (h - 2 * original_error + h_minus) / (.001f * .001f);

					//reset weight
					net.layers[l]->recognition_weights[d]->at(i, j) -= .001f;
				}
			}
		}
	}

	return output;
}
std::vector<std::vector<IMatrix<float>*>> NeuralNetAnalyzer::approximate_bias_gradient(NeuralNet &net)
{
	//setup output
	std::vector<std::vector<IMatrix<float>*>> output(net.layers.size());
	for (int i = 0; i < output.size(); ++i)
	{
		output[i] = std::vector<IMatrix<float>*>(net.layers[i]->biases.size());
		for (int f = 0; f < output[i].size(); ++f)
			output[i][f] = net.layers[i]->biases[f]->clone();
	}

	//find error for current network
	net.discriminate();
	float original_error = net.global_error();

	//begin evaluating derivatives of the error numerically for only one bias at a time, this requires the network to be ran for every single bias
	for (int l = 0; l < net.layers.size(); ++l)
	{
		for (int f = 0; f < net.layers[l]->biases.size(); ++f)
		{
			for (int i = 0; i < net.layers[l]->biases[f]->rows(); ++i)
			{
				for (int j = 0; j < net.layers[l]->biases[f]->cols(); ++j)
				{
					//adjust current weight
					net.layers[l]->biases[f]->at(i, j) += .001f;

					//evaluate network with adjusted weight and approximate derivative
					net.discriminate();
					float adjusted_error = net.global_error();
					float diff = (adjusted_error - original_error) / .001f;
					output[l][f]->at(i, j) = diff;

					//reset weight
					net.layers[l]->biases[f]->at(i, j) -= .001f;
				}
			}
		}
	}

	return output;
}