void OnlineRNNet::weightedParameterDerivative(RealMatrix const& pattern, const RealMatrix& coefficients, RealVector& gradient) { SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time. SIZE_CHECK(coefficients.size1()==1); SIZE_CHECK(pattern.size2() == inputSize()); SIZE_CHECK(pattern.size2() == coefficients.size2()); gradient.resize(mpe_structure->parameters()); std::size_t numNeurons = mpe_structure->numberOfNeurons(); std::size_t numUnits = mpe_structure->numberOfUnits(); //first check wether this is the first call of the derivative after a change of internal structure. in this case we have to allocate A LOT //of memory for the derivative and set it to zero. if(m_unitGradient.size1() != mpe_structure->parameters() || m_unitGradient.size2() != numNeurons) { m_unitGradient.resize(mpe_structure->parameters(),numNeurons); m_unitGradient.clear(); } //for the next steps see Kenji Doya, "Recurrent Networks: Learning Algorithms" //calculate the derivative for all neurons f' RealVector neuronDerivatives(numNeurons); for(std::size_t i=0; i!=numNeurons; ++i) { neuronDerivatives(i)=mpe_structure->neuronDerivative(m_activation(i+inputSize()+1)); } //calculate the derivative for every weight using the derivative of the last time step auto hiddenWeights = columns( mpe_structure->weights(), inputSize()+1,numUnits ); //update the new gradient with the effect of last timestep noalias(m_unitGradient) = prod(m_unitGradient,trans(hiddenWeights)); //add the effect of the current time step std::size_t param = 0; for(std::size_t i = 0; i != numNeurons; ++i) { for(std::size_t j = 0; j != numUnits; ++j) { if(mpe_structure->connection(i,j)) { m_unitGradient(param,i) += m_lastActivation(j); ++param; } } } //multiply with outer derivative of the neurons for(std::size_t i = 0; i != m_unitGradient.size1(); ++i) { noalias(row(m_unitGradient,i)) = element_prod(row(m_unitGradient,i),neuronDerivatives); } //and formula 4 (the gradient itself) noalias(gradient) = prod( columns(m_unitGradient,numNeurons-outputSize(),numNeurons), row(coefficients,0) ); //sanity check SIZE_CHECK(param == mpe_structure->parameters()); }
void CMACMap::eval(RealMatrix const& patterns,RealMatrix &output) const{ SIZE_CHECK(patterns.size2() == m_inputSize); std::size_t numPatterns = patterns.size1(); output.resize(numPatterns, m_outputShape.numElements()); output.clear(); for(std::size_t i = 0; i != numPatterns; ++i){ std::vector<std::size_t> indizes = getIndizes(row(patterns,i)); for (std::size_t o = 0; o != output.size2(); ++o) { for (std::size_t j = 0; j != m_tilings; ++j) { output(i,o) += m_parameters(indizes[j] + o*m_parametersPerTiling); } } } }
// subtract the mean from each row void ZeroSum(RealMatrix& mat) { RealVector sum(mat.size2(), 0.0); for (size_t j=0; j<mat.size1(); j++) sum += row(mat, j); RealVector mean = (1.0 / mat.size1()) * sum; for (size_t j=0; j<mat.size1(); j++) row(mat, j) -= mean; }
double evalDerivative( RealMatrix const&, RealMatrix const& prediction, RealMatrix& gradient ) const { gradient.resize(prediction.size1(),prediction.size2()); gradient.clear(); return 0; }
void CMACMap::weightedParameterDerivative( RealMatrix const& patterns, RealMatrix const& coefficients, State const&,//not needed RealVector &gradient ) const { SIZE_CHECK(patterns.size2() == m_inputSize); SIZE_CHECK(coefficients.size2() == m_outputSize); SIZE_CHECK(coefficients.size1() == patterns.size1()); std::size_t numPatterns = patterns.size1(); gradient.resize(m_parameters.size()); gradient.clear(); for(std::size_t i = 0; i != numPatterns; ++i) { std::vector<std::size_t> indizes = getIndizes(row(patterns,i)); for (std::size_t o=0; o!=m_outputSize; ++o) { for (std::size_t j=0; j != m_tilings; ++j) { gradient(indizes[j] + o*m_parametersPerTiling) += coefficients(i,o); } } } }
void DiscreteLoss::defineCostMatrix(RealMatrix const& cost){ // check validity std::size_t size = cost.size1(); SHARK_ASSERT(cost.size2() == size); for (std::size_t i = 0; i != size; i++){ for (std::size_t j = 0; j != size; j++){ SHARK_ASSERT(cost(i, j) >= 0.0); } SHARK_ASSERT(cost(i, i) == 0.0); } m_cost = cost; }
//! Returns a model mapping encoded data from the //! m-dimensional PCA coordinate system back to the //! n-dimensional original coordinate system. void PCA::decoder(LinearModel<>& model, std::size_t m) { if(!m) m = std::min(m_n,m_l); if( m == m_n && !m_whitening){ model.setStructure(m_eigenvectors, m_mean); } RealMatrix A = columns(m_eigenvectors, 0, m); if(m_whitening){ for(std::size_t i=0; i<A.size2(); i++) { //take care of numerical difficulties for very small eigenvalues. if(m_eigenvalues(i)/m_eigenvalues(0) < 1.e-15){ column(A,i).clear(); } else{ column(A, i) = column(A, i) * std::sqrt(m_eigenvalues(i)); } } } model.setStructure(A, m_mean); }
RealMatrix NormalizeComponentsWhitening::createWhiteningMatrix( RealMatrix& covariance ){ SIZE_CHECK(covariance.size1() == covariance.size2()); std::size_t m = covariance.size1(); //we use the inversed cholesky decomposition for whitening //since we have to assume that covariance does not have full rank, we use //the generalized decomposition RealMatrix whiteningMatrix(m,m,0.0); //do a pivoting cholesky decomposition //this destroys the covariance matrix as it is not neeeded anymore afterwards. PermutationMatrix permutation(m); std::size_t rank = pivotingCholeskyDecompositionInPlace(covariance,permutation); //only take the nonzero columns as C auto C = columns(covariance,0,rank); //full rank, means that we can use the typical cholesky inverse with pivoting //so U is P C^-1 P^T if(rank == m){ noalias(whiteningMatrix) = identity_matrix<double>( m ); solveTriangularSystemInPlace<SolveXAB,upper>(trans(C),whiteningMatrix); swap_full_inverted(permutation,whiteningMatrix); return whiteningMatrix; } //complex case. //A' = P C(C^TC)^-1(C^TC)^-1 C^T P^T //=> P^T U P = C(C^TC)^-1 //<=> P^T U P (C^TC) = C RealMatrix CTC = prod(trans(C),C); auto submat = columns(whiteningMatrix,0,rank); solveSymmPosDefSystem<SolveXAB>(CTC,submat,C); swap_full_inverted(permutation,whiteningMatrix); return whiteningMatrix; }
void OnlineRNNet::eval(RealMatrix const& pattern, RealMatrix& output){ SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time. SIZE_CHECK(pattern.size2() == inputSize()); std::size_t numUnits = mpe_structure->numberOfUnits(); if(m_lastActivation.size() != numUnits){ m_activation.resize(numUnits); m_lastActivation.resize(numUnits); zero(m_activation); zero(m_lastActivation); } swap(m_lastActivation,m_activation); //we want to treat input and bias neurons exactly as hidden or output neurons, so we copy the current //pattern at the beginning of the the last activation pattern aand set the bias neuron to 1 ////so m_lastActivation has the format (input|1|lastNeuronActivation) noalias(subrange(m_lastActivation,0,mpe_structure->inputs())) = row(pattern,0); m_lastActivation(mpe_structure->bias())=1; m_activation(mpe_structure->bias())=1; //activation of the hidden neurons is now just a matrix vector multiplication fast_prod( mpe_structure->weights(), m_lastActivation, subrange(m_activation,inputSize()+1,numUnits) ); //now apply the sigmoid function for (std::size_t i = inputSize()+1;i != numUnits;i++){ m_activation(i) = mpe_structure->neuron(m_activation(i)); } //copy the result to the output output.resize(1,outputSize()); noalias(row(output,0)) = subrange(m_activation,numUnits-outputSize(),numUnits); }