// subtract the mean from each row void ZeroSum(RealMatrix& mat) { RealVector sum(mat.size2(), 0.0); for (size_t j=0; j<mat.size1(); j++) sum += row(mat, j); RealVector mean = (1.0 / mat.size1()) * sum; for (size_t j=0; j<mat.size1(); j++) row(mat, j) -= mean; }
// adds just a value c on the input void eval(RealMatrix const& patterns, RealMatrix& output, State& state)const { output.resize(patterns.size1(),m_dim); for(std::size_t p = 0; p != patterns.size1();++p){ for (size_t i=0;i!=m_dim;++i) output(p,i)=patterns(p,i)+m_c; } }
void OnlineRNNet::weightedParameterDerivative(RealMatrix const& pattern, const RealMatrix& coefficients, RealVector& gradient) { SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time. SIZE_CHECK(coefficients.size1()==1); SIZE_CHECK(pattern.size2() == inputSize()); SIZE_CHECK(pattern.size2() == coefficients.size2()); gradient.resize(mpe_structure->parameters()); std::size_t numNeurons = mpe_structure->numberOfNeurons(); std::size_t numUnits = mpe_structure->numberOfUnits(); //first check wether this is the first call of the derivative after a change of internal structure. in this case we have to allocate A LOT //of memory for the derivative and set it to zero. if(m_unitGradient.size1() != mpe_structure->parameters() || m_unitGradient.size2() != numNeurons) { m_unitGradient.resize(mpe_structure->parameters(),numNeurons); m_unitGradient.clear(); } //for the next steps see Kenji Doya, "Recurrent Networks: Learning Algorithms" //calculate the derivative for all neurons f' RealVector neuronDerivatives(numNeurons); for(std::size_t i=0; i!=numNeurons; ++i) { neuronDerivatives(i)=mpe_structure->neuronDerivative(m_activation(i+inputSize()+1)); } //calculate the derivative for every weight using the derivative of the last time step auto hiddenWeights = columns( mpe_structure->weights(), inputSize()+1,numUnits ); //update the new gradient with the effect of last timestep noalias(m_unitGradient) = prod(m_unitGradient,trans(hiddenWeights)); //add the effect of the current time step std::size_t param = 0; for(std::size_t i = 0; i != numNeurons; ++i) { for(std::size_t j = 0; j != numUnits; ++j) { if(mpe_structure->connection(i,j)) { m_unitGradient(param,i) += m_lastActivation(j); ++param; } } } //multiply with outer derivative of the neurons for(std::size_t i = 0; i != m_unitGradient.size1(); ++i) { noalias(row(m_unitGradient,i)) = element_prod(row(m_unitGradient,i),neuronDerivatives); } //and formula 4 (the gradient itself) noalias(gradient) = prod( columns(m_unitGradient,numNeurons-outputSize(),numNeurons), row(coefficients,0) ); //sanity check SIZE_CHECK(param == mpe_structure->parameters()); }
virtual void weightedParameterDerivative( RealMatrix const& input, RealMatrix const& coefficients, State const& state, RealVector& derivative)const { derivative.resize(1); derivative(0)=0; for (size_t p = 0; p < coefficients.size1(); p++) { derivative(0) +=sum(row(coefficients,p)); } }
double evalDerivative( RealMatrix const&, RealMatrix const& prediction, RealMatrix& gradient ) const { gradient.resize(prediction.size1(),prediction.size2()); gradient.clear(); return 0; }
void CMACMap::weightedParameterDerivative( RealMatrix const& patterns, RealMatrix const& coefficients, State const&,//not needed RealVector &gradient ) const { SIZE_CHECK(patterns.size2() == m_inputSize); SIZE_CHECK(coefficients.size2() == m_outputSize); SIZE_CHECK(coefficients.size1() == patterns.size1()); std::size_t numPatterns = patterns.size1(); gradient.resize(m_parameters.size()); gradient.clear(); for(std::size_t i = 0; i != numPatterns; ++i) { std::vector<std::size_t> indizes = getIndizes(row(patterns,i)); for (std::size_t o=0; o!=m_outputSize; ++o) { for (std::size_t j=0; j != m_tilings; ++j) { gradient(indizes[j] + o*m_parametersPerTiling) += coefficients(i,o); } } } }
void DiscreteLoss::defineCostMatrix(RealMatrix const& cost){ // check validity std::size_t size = cost.size1(); SHARK_ASSERT(cost.size2() == size); for (std::size_t i = 0; i != size; i++){ for (std::size_t j = 0; j != size; j++){ SHARK_ASSERT(cost(i, j) >= 0.0); } SHARK_ASSERT(cost(i, i) == 0.0); } m_cost = cost; }
RealMatrix NormalizeComponentsWhitening::createWhiteningMatrix( RealMatrix& covariance ){ SIZE_CHECK(covariance.size1() == covariance.size2()); std::size_t m = covariance.size1(); //we use the inversed cholesky decomposition for whitening //since we have to assume that covariance does not have full rank, we use //the generalized decomposition RealMatrix whiteningMatrix(m,m,0.0); //do a pivoting cholesky decomposition //this destroys the covariance matrix as it is not neeeded anymore afterwards. PermutationMatrix permutation(m); std::size_t rank = pivotingCholeskyDecompositionInPlace(covariance,permutation); //only take the nonzero columns as C auto C = columns(covariance,0,rank); //full rank, means that we can use the typical cholesky inverse with pivoting //so U is P C^-1 P^T if(rank == m){ noalias(whiteningMatrix) = identity_matrix<double>( m ); solveTriangularSystemInPlace<SolveXAB,upper>(trans(C),whiteningMatrix); swap_full_inverted(permutation,whiteningMatrix); return whiteningMatrix; } //complex case. //A' = P C(C^TC)^-1(C^TC)^-1 C^T P^T //=> P^T U P = C(C^TC)^-1 //<=> P^T U P (C^TC) = C RealMatrix CTC = prod(trans(C),C); auto submat = columns(whiteningMatrix,0,rank); solveSymmPosDefSystem<SolveXAB>(CTC,submat,C); swap_full_inverted(permutation,whiteningMatrix); return whiteningMatrix; }
void CMACMap::eval(RealMatrix const& patterns,RealMatrix &output) const { SIZE_CHECK(patterns.size2() == m_inputSize); std::size_t numPatterns = patterns.size1(); output.resize(numPatterns,m_outputSize); output.clear(); for(std::size_t i = 0; i != numPatterns; ++i) { std::vector<std::size_t> indizes = getIndizes(row(patterns,i)); for (std::size_t o=0; o!=m_outputSize; ++o) { for (std::size_t j = 0; j != m_tilings; ++j) { output(i,o) += m_parameters(indizes[j] + o*m_parametersPerTiling); } } } }
//! Returns a model mapping the original data to the //! m-dimensional PCA coordinate system. void PCA::encoder(LinearModel<>& model, std::size_t m) { if(!m) m = std::min(m_n,m_l); RealMatrix A = trans(columns(m_eigenvectors, 0, m) ); RealVector offset = -prod(A, m_mean); if(m_whitening){ for(std::size_t i=0; i<A.size1(); i++) { //take care of numerical difficulties for very small eigenvalues. if(m_eigenvalues(i)/m_eigenvalues(0) < 1.e-15){ row(A,i).clear(); offset(i) = 0; } else{ row(A, i) /= std::sqrt(m_eigenvalues(i)); offset(i) /= std::sqrt(m_eigenvalues(i)); } } } model.setStructure(A, offset); }
void OnlineRNNet::eval(RealMatrix const& pattern, RealMatrix& output){ SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time. SIZE_CHECK(pattern.size2() == inputSize()); std::size_t numUnits = mpe_structure->numberOfUnits(); if(m_lastActivation.size() != numUnits){ m_activation.resize(numUnits); m_lastActivation.resize(numUnits); zero(m_activation); zero(m_lastActivation); } swap(m_lastActivation,m_activation); //we want to treat input and bias neurons exactly as hidden or output neurons, so we copy the current //pattern at the beginning of the the last activation pattern aand set the bias neuron to 1 ////so m_lastActivation has the format (input|1|lastNeuronActivation) noalias(subrange(m_lastActivation,0,mpe_structure->inputs())) = row(pattern,0); m_lastActivation(mpe_structure->bias())=1; m_activation(mpe_structure->bias())=1; //activation of the hidden neurons is now just a matrix vector multiplication fast_prod( mpe_structure->weights(), m_lastActivation, subrange(m_activation,inputSize()+1,numUnits) ); //now apply the sigmoid function for (std::size_t i = inputSize()+1;i != numUnits;i++){ m_activation(i) = mpe_structure->neuron(m_activation(i)); } //copy the result to the output output.resize(1,outputSize()); noalias(row(output,0)) = subrange(m_activation,numUnits-outputSize(),numUnits); }
void LDA::train(LinearClassifier<>& model, WeightedLabeledData<RealVector,unsigned int> const& dataset){ if(dataset.empty()){ throw SHARKEXCEPTION("[LDA::train] the dataset must not be empty"); } std::size_t dim = inputDimension(dataset); std::size_t classes = numberOfClasses(dataset); //required statistics RealMatrix means(classes, dim,0.0); RealMatrix covariance(dim, dim,0.0); double weightSum = sumOfWeights(dataset); RealVector classWeight(classes,0.0); //we compute the data batch wise for(auto const& batch: dataset.batches()){ UIntVector const& labels = batch.data.label; RealMatrix points = batch.data.input; RealVector const& weights = batch.weight; //load batch and update mean std::size_t currentBatchSize = points.size1(); for (std::size_t e=0; e != currentBatchSize; e++){ //update mean and class count for this sample std::size_t c = labels(e); classWeight(c) += weights(e); noalias(row(means,c)) += weights(e)*row(points,e); row(points,e) *= std::sqrt(weights(e)); } //update second moment matrix noalias(covariance) += prod(trans(points),points); } covariance /= weightSum; //calculate mean and the covariance matrix from second moment for (std::size_t c = 0; c != classes; c++){ if (classWeight[c] == 0.0) throw SHARKEXCEPTION("[LDA::train] LDA can not handle a class without examples"); row(means,c) /= classWeight(c); double factor = classWeight(c) / weightSum; noalias(covariance)-= factor*outer_prod(row(means,c),row(means,c)); } //add regularization diag(covariance) += m_regularization; //the formula for the linear classifier is // arg max_i log(P(x|i) * P(i)) //= arg max_i log(P(x|i)) +log(P(i)) //= arg max_i -(x-m_i)^T C^-1 (x-m_i) +log(P(i)) //= arg max_i -m_i^T C^-1 m_i +2* x^T C^-1 m_i + log(P(i)) //so we compute first C^-1 m_i and then the first term // compute z = m_i^T C^-1 <=> z C = m_i // this is the expensive step of the calculation. RealMatrix transformedMeans = means; blas::solveSymmSemiDefiniteSystemInPlace<blas::SolveXAB>(covariance,transformedMeans); //compute bias terms m_i^T C^-1 m_i - log(P(i)) RealVector bias(classes); for(std::size_t c = 0; c != classes; ++c){ double prior = std::log(classWeight(c)/weightSum); bias(c) = - 0.5* inner_prod(row(means,c),row(transformedMeans,c)) + prior; } //fill the model model.decisionFunction().setStructure(transformedMeans,bias); }