void RNNet::eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{ //initialize the history for the whole batch of sequences InternalState& s = state.toState<InternalState>(); std::size_t warmUpLength=m_warmUpSequence.size(); std::size_t numUnits = mpe_structure->numberOfUnits(); s.timeActivation.resize(patterns.size()); outputs.resize(patterns.size()); //calculation of the sequences for(std::size_t b = 0; b != patterns.size();++b){ std::size_t sequenceLength = patterns[b].size()+warmUpLength+1; s.timeActivation[b].resize(sequenceLength,RealVector(numUnits)); outputs[b].resize(patterns[b].size(),RealVector(numUnits)); Sequence& sequence = s.timeActivation[b]; sequence[0].clear(); for (std::size_t t = 1; t < sequenceLength;t++){ //we want to treat input neurons exactly as hidden or output neurons, so we copy the current //pattern at the beginning of the the last activation pattern. After that, all activations //required for this timestep are in s.timeActivation[t-1] if(t<=warmUpLength) //we are still in warm up phase noalias(subrange(sequence[t-1],0,inputSize())) = m_warmUpSequence[t-1]; else noalias(subrange(sequence[t-1],0,inputSize())) = patterns[b][t-1-warmUpLength]; //and set the bias to 1 sequence[t-1](mpe_structure->bias())=1; //activation of the hidden neurons is now just a matrix vector multiplication noalias(subrange(sequence[t],inputSize()+1,numUnits)) = prod( mpe_structure->weights(), sequence[t-1] ); //now apply the sigmoid function for (std::size_t i = inputSize()+1;i != numUnits;i++) sequence[t](i) = mpe_structure->neuron(sequence[t](i)); //if the warmup is over, we can copy the results into the output if(t>warmUpLength) outputs[b][t-1-warmUpLength] = subrange(sequence[t],numUnits-outputSize(),numUnits); } } }
void RNNet::weightedParameterDerivative( BatchInputType const& patterns, BatchInputType const& coefficients, State const& state, RealVector& gradient )const{ //SIZE_CHECK(pattern.size() == coefficients.size()); InternalState const& s = state.toState<InternalState>(); gradient.resize(numberOfParameters()); gradient.clear(); std::size_t numUnits = mpe_structure->numberOfUnits(); std::size_t numNeurons = mpe_structure->numberOfNeurons(); std::size_t warmUpLength=m_warmUpSequence.size(); for(std::size_t b = 0; b != patterns.size(); ++b){ Sequence const& sequence = s.timeActivation[b]; std::size_t sequenceLength = s.timeActivation[b].size(); RealMatrix errorDerivative(sequenceLength,numNeurons); errorDerivative.clear(); //copy errors for (std::size_t t = warmUpLength+1; t != sequenceLength; ++t) for(std::size_t i = 0; i != outputSize(); ++i) errorDerivative(t,i+numNeurons-outputSize())=coefficients[b][t-warmUpLength-1](i); //backprop through time for (std::size_t t = (int)sequence.size()-1; t > 0; t--){ for (std::size_t j = 0; j != numNeurons; ++j){ double derivative = mpe_structure->neuronDerivative(sequence[t](j+mpe_structure->inputs()+1)); errorDerivative(t,j)*=derivative; } noalias(row(errorDerivative,t-1)) += prod( trans(columns(mpe_structure->weights(), inputSize()+1,numUnits)), row(errorDerivative,t) ); } //update gradient for batch element i std::size_t param = 0; for (std::size_t i = 0; i != numNeurons; ++i){ for (std::size_t j = 0; j != numUnits; ++j){ if(!mpe_structure->connection(i,j))continue; for(std::size_t t=1;t != sequence.size(); ++t) gradient(param)+=errorDerivative(t,i) * sequence[t-1](j); ++param; } } //sanity check SIZE_CHECK(param == mpe_structure->parameters()); } }