void NeuralNetwork::backprop(const arma::mat& input, const arma::mat& output) { std::vector<arma::mat> gradients; gradients.push_back(m_activationOnLayer[m_numLayers - 1] - output); unsigned int prevErrorIndex = 0; for (int layer = m_numLayers - 2; layer > 0; --layer) { arma::mat error; error = gradients[prevErrorIndex++] * m_theta[layer].cols(1, m_theta[layer].n_cols - 1); error = error % sigmoidGradient(m_partialOnLayer[layer - 1]); gradients.push_back(error); } int errorIndex = 0; for (int layer = m_numLayers - 2; layer >= 0; --layer) { gradients[errorIndex] = (1.0 / input.n_rows) * (gradients[errorIndex].t() * m_activationOnLayer[layer]); ++errorIndex; } std::reverse(gradients.begin(), gradients.end()); for (unsigned int layer = 0; layer < m_numLayers - 1; ++layer) { int lastCol = gradients[layer].n_cols - 1; gradients[layer].cols(1, lastCol) += (m_regFactor / input.n_rows) * m_theta[layer].cols(1, lastCol); } //checkGradients(input, output, gradients); gradientDescent(gradients); }
std::vector<double> NNTrainer::gradient( const double lambda) { int numExamples = trainingSet->size(); //-- Create n matrices with the dimensions of the weight matrices: std::vector<Matrix> Delta; for (int i = 0; i < (int) nn->getWeights().size(); i++) Delta.push_back( Matrix( nn->getWeights().at(i)->getNumRows(), nn->getWeights().at(i)->getNumCols() )); //-- Iterate over all training examples for (int i = 0; i < numExamples; i++) { //-- Forward-propagate the network: nn->setInput( trainingSet->at(i).x ); //-- Create vector to store the increments: //-- Increments will be stored in reverse order (i.e. the last increment first) std::vector<Matrix> inc; //-- Increment for output layer Matrix output = Matrix(nn->getOutput(), nn->getOutput().size(), 1); Matrix y = Matrix(trainingSet->at(i).y , trainingSet->at(i).y.size(), 1); inc.push_back( output - y); //-- Increment for hidden layers for (int l = nn->getL() - 2; l > 0; l--) { Matrix aux1 = nn->getWeights().at(l)->transpose() * inc.back(); Matrix aux2( aux1.getNumRows()-1, aux1.getNumCols()); for (int j = 0; j < aux2.getNumCols(); j++) for (int k = 0; k < aux2.getNumRows(); k++) aux2.set( k, j, aux1.get(k+1, j) * sigmoidGradient( nn->getActivation(l).at(k)) ); inc.push_back( aux2 ); } //-- Input layer has no error associated (has no weights associated) //-- Accumulate error: for (int l = 0; l < (int) Delta.size(); l++) { Matrix aux1( Delta.at(l).getNumRows(), Delta.at(l).getNumCols() ); for (int j = 0; j < aux1.getNumRows(); j++) aux1.set( j, 0, inc.at( inc.size()- l -1).get( j, 0) ); for (int j = 0; j < aux1.getNumRows(); j++) for (int k = 1; k < aux1.getNumCols(); k++) aux1.set( j, k, inc.at( inc.size()- l -1).get( j, 0) * nn->getActivation(l).at(k-1)); Delta.at(l) += aux1; } } //-- Divide by number of training examples: for (int l = 0; l < (int) Delta.size(); l++) Delta.at(l) /= numExamples; //-- Regularization //------------------------------------------------------------------------ if (lambda != 0) { for (int l = 0; l < (int) Delta.size(); l++) { Matrix aux(nn->getWeights().at(l)->getNumRows(), nn->getWeights().at(l)->getNumCols() ); for (int j = 0; j < aux.getNumRows(); j++) for (int k = 1; k < aux.getNumCols(); k++) aux.set( j, k, nn->getWeights().at(l)->get(j, k) * lambda / numExamples); Delta.at(l) += aux; } } //-- Unroll gradient: //--------------------------------------------------------------------------- std::vector<double> unrolled = Delta.front().unroll(); for (int l = 1; l < (int) Delta.size(); l++) for (int j = 0; j < Delta.at(l).getNumRows(); j++) for (int k = 0; k < Delta.at(l).getNumCols(); k++) unrolled.push_back( Delta.at(l).get(j, k)); return unrolled; }