void NeuralNetwork::backprop(const arma::mat& input, const arma::mat& output)
{
    std::vector<arma::mat> gradients;

    gradients.push_back(m_activationOnLayer[m_numLayers - 1] - output);

    unsigned int prevErrorIndex = 0;
    for (int layer = m_numLayers - 2; layer > 0; --layer)
    {
        arma::mat error;
        error = gradients[prevErrorIndex++] * m_theta[layer].cols(1, m_theta[layer].n_cols - 1);
        error = error % sigmoidGradient(m_partialOnLayer[layer - 1]);
        gradients.push_back(error);
    }

    int errorIndex = 0;
    for (int layer = m_numLayers - 2; layer >= 0; --layer)
    {
        gradients[errorIndex] = (1.0 / input.n_rows) * (gradients[errorIndex].t() * m_activationOnLayer[layer]);
        ++errorIndex;
    }

    std::reverse(gradients.begin(), gradients.end());

    for (unsigned int layer = 0; layer < m_numLayers - 1; ++layer)
    {
        int lastCol = gradients[layer].n_cols - 1;
        gradients[layer].cols(1, lastCol) += (m_regFactor / input.n_rows) * m_theta[layer].cols(1, lastCol);
    }

    //checkGradients(input, output, gradients);
    gradientDescent(gradients);
}
std::vector<double> NNTrainer::gradient( const double lambda)
{
    int numExamples = trainingSet->size();
    //-- Create n matrices with the dimensions of the weight matrices:
    std::vector<Matrix> Delta;

    for (int i = 0; i < (int) nn->getWeights().size(); i++)
	Delta.push_back( Matrix( nn->getWeights().at(i)->getNumRows(), nn->getWeights().at(i)->getNumCols() ));

    //-- Iterate over all training examples
    for (int i = 0; i < numExamples; i++)
    {
	//-- Forward-propagate the network:
	nn->setInput( trainingSet->at(i).x );

	//-- Create vector to store the increments:
	//-- Increments will be stored in reverse order (i.e. the last increment first)
	std::vector<Matrix> inc;

	//-- Increment for output layer
	Matrix output = Matrix(nn->getOutput(), nn->getOutput().size(), 1);
	Matrix y = Matrix(trainingSet->at(i).y , trainingSet->at(i).y.size(), 1);

	inc.push_back( output - y);

	//-- Increment for hidden layers
	for (int l = nn->getL() - 2; l > 0; l--)
	{

	    Matrix aux1 = nn->getWeights().at(l)->transpose() * inc.back();
	    Matrix aux2( aux1.getNumRows()-1, aux1.getNumCols());

	    for (int j = 0; j < aux2.getNumCols(); j++)
		for (int k = 0; k < aux2.getNumRows(); k++)
		    aux2.set( k, j, aux1.get(k+1, j) * sigmoidGradient( nn->getActivation(l).at(k)) );

	    inc.push_back( aux2 );
	}

	//-- Input layer has no error associated (has no weights associated)

	//-- Accumulate error:
	for (int l = 0; l < (int) Delta.size(); l++)
	{
	    Matrix aux1( Delta.at(l).getNumRows(), Delta.at(l).getNumCols() );

	    for (int j = 0; j < aux1.getNumRows(); j++)
		aux1.set( j, 0, inc.at( inc.size()- l -1).get( j, 0) );

	    for (int j = 0; j < aux1.getNumRows(); j++)
		for (int k = 1; k < aux1.getNumCols(); k++)
		    aux1.set( j, k, inc.at( inc.size()- l -1).get( j, 0) * nn->getActivation(l).at(k-1));

	    Delta.at(l) += aux1;
	}


    }

    //-- Divide by number of training examples:
    for (int l = 0; l < (int) Delta.size(); l++)
	Delta.at(l) /= numExamples;

    //-- Regularization
    //------------------------------------------------------------------------
    if (lambda != 0)
    {
	for (int l = 0; l < (int) Delta.size(); l++)
	{
	    Matrix aux(nn->getWeights().at(l)->getNumRows(), nn->getWeights().at(l)->getNumCols() );

	    for (int j = 0; j < aux.getNumRows(); j++)
		for (int k = 1; k < aux.getNumCols(); k++)
		    aux.set( j, k, nn->getWeights().at(l)->get(j, k) * lambda / numExamples);

	    Delta.at(l) += aux;
	}
    }


    //-- Unroll gradient:
    //---------------------------------------------------------------------------
    std::vector<double> unrolled = Delta.front().unroll();

    for (int l = 1; l < (int) Delta.size(); l++)
	for (int j = 0; j < Delta.at(l).getNumRows(); j++)
	    for (int k = 0; k < Delta.at(l).getNumCols(); k++)
		unrolled.push_back( Delta.at(l).get(j, k));


    return unrolled;
}