void SigmoidModel::eval(BatchInputType const&patterns, BatchOutputType& outputs)const{ SIZE_CHECK( patterns.size2() == 1 ); outputs.resize(patterns.size1(),1); //note that because of the way the intermediate result is passed to the sigmoid member function // (facilitating derivatives and sub-classes), we here have to substract the bias parameter. noalias(column(outputs,0)) = column(patterns,0)*m_parameters(0) - blas::repeat(m_parameters(1),patterns.size1()); for(std::size_t i = 0; i != patterns.size1(); ++i) outputs(i,0) = sigmoid(outputs(i,0)); }
void Softmax::weightedParameterDerivative( BatchInputType const& patterns, BatchOutputType const& coefficients, State const& state, RealVector& gradient )const{ SIZE_CHECK(patterns.size2() == inputSize()); SIZE_CHECK(coefficients.size2()==outputSize()); SIZE_CHECK(coefficients.size1()==patterns.size1()); gradient.resize(0); }
void SigmoidModel::weightedInputDerivative( BatchInputType const& patterns, BatchOutputType const& coefficients, State const& state, BatchInputType& derivatives )const{ SIZE_CHECK( patterns.size2() == 1 ); SIZE_CHECK( coefficients.size2() == 1 ); SIZE_CHECK( coefficients.size1() == patterns.size1() ); InternalState const& s = state.toState<InternalState>(); std::size_t numPatterns= patterns.size1(); derivatives.resize( numPatterns,1); //calculate derivative for(std::size_t i = 0; i != numPatterns; ++i){ double der = sigmoidDerivative( s.result(i) ); derivatives(i,0) = coefficients(i,0) * der * m_parameters(0); } }
void RNNet::eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{ //initialize the history for the whole batch of sequences InternalState& s = state.toState<InternalState>(); std::size_t warmUpLength=m_warmUpSequence.size(); std::size_t numUnits = mpe_structure->numberOfUnits(); s.timeActivation.resize(patterns.size()); outputs.resize(patterns.size()); //calculation of the sequences for(std::size_t b = 0; b != patterns.size();++b){ std::size_t sequenceLength = patterns[b].size()+warmUpLength+1; s.timeActivation[b].resize(sequenceLength,RealVector(numUnits)); outputs[b].resize(patterns[b].size(),RealVector(numUnits)); Sequence& sequence = s.timeActivation[b]; sequence[0].clear(); for (std::size_t t = 1; t < sequenceLength;t++){ //we want to treat input neurons exactly as hidden or output neurons, so we copy the current //pattern at the beginning of the the last activation pattern. After that, all activations //required for this timestep are in s.timeActivation[t-1] if(t<=warmUpLength) //we are still in warm up phase noalias(subrange(sequence[t-1],0,inputSize())) = m_warmUpSequence[t-1]; else noalias(subrange(sequence[t-1],0,inputSize())) = patterns[b][t-1-warmUpLength]; //and set the bias to 1 sequence[t-1](mpe_structure->bias())=1; //activation of the hidden neurons is now just a matrix vector multiplication noalias(subrange(sequence[t],inputSize()+1,numUnits)) = prod( mpe_structure->weights(), sequence[t-1] ); //now apply the sigmoid function for (std::size_t i = inputSize()+1;i != numUnits;i++) sequence[t](i) = mpe_structure->neuron(sequence[t](i)); //if the warmup is over, we can copy the results into the output if(t>warmUpLength) outputs[b][t-1-warmUpLength] = subrange(sequence[t],numUnits-outputSize(),numUnits); } } }
void Softmax::eval(BatchInputType const& patterns,BatchOutputType& outputs)const{ SIZE_CHECK(patterns.size2() == inputSize()); if(inputSize() == 1){ outputs.resize(patterns.size1(),2); for(std::size_t i = 0; i != patterns.size1();++i){ outputs(i,0) = exp(patterns(i,0)); outputs(i,1) = 1/outputs(i,0); } }else{ outputs.resize(patterns.size1(),inputSize()); noalias(outputs) = exp(patterns); } for(size_t i = 0; i != patterns.size1(); ++i){ row(outputs,i) /= sum(row(outputs,i)); } }
void SigmoidModel::weightedParameterDerivative( BatchInputType const& patterns, BatchOutputType const& coefficients, State const& state, RealVector& gradient )const{ SIZE_CHECK( patterns.size2() == 1 ); SIZE_CHECK( coefficients.size2() == 1 ); SIZE_CHECK( coefficients.size1() == patterns.size1() ); InternalState const& s = state.toState<InternalState>(); gradient.resize(2); gradient(0)=0; gradient(1)=0; //calculate derivative for(std::size_t i = 0; i != patterns.size1(); ++i){ double derivative = sigmoidDerivative( s.result(i) ); double slope= coefficients(i,0)*derivative*patterns(i,0); //w.r.t. slope if ( m_transformForUnconstrained ) slope *= m_parameters(0); gradient(0)+=slope; if ( m_useOffset ) { gradient(1) -= coefficients(i,0)*derivative; //w.r.t. bias parameter } } }
void RNNet::weightedParameterDerivative( BatchInputType const& patterns, BatchInputType const& coefficients, State const& state, RealVector& gradient )const{ //SIZE_CHECK(pattern.size() == coefficients.size()); InternalState const& s = state.toState<InternalState>(); gradient.resize(numberOfParameters()); gradient.clear(); std::size_t numUnits = mpe_structure->numberOfUnits(); std::size_t numNeurons = mpe_structure->numberOfNeurons(); std::size_t warmUpLength=m_warmUpSequence.size(); for(std::size_t b = 0; b != patterns.size(); ++b){ Sequence const& sequence = s.timeActivation[b]; std::size_t sequenceLength = s.timeActivation[b].size(); RealMatrix errorDerivative(sequenceLength,numNeurons); errorDerivative.clear(); //copy errors for (std::size_t t = warmUpLength+1; t != sequenceLength; ++t) for(std::size_t i = 0; i != outputSize(); ++i) errorDerivative(t,i+numNeurons-outputSize())=coefficients[b][t-warmUpLength-1](i); //backprop through time for (std::size_t t = (int)sequence.size()-1; t > 0; t--){ for (std::size_t j = 0; j != numNeurons; ++j){ double derivative = mpe_structure->neuronDerivative(sequence[t](j+mpe_structure->inputs()+1)); errorDerivative(t,j)*=derivative; } noalias(row(errorDerivative,t-1)) += prod( trans(columns(mpe_structure->weights(), inputSize()+1,numUnits)), row(errorDerivative,t) ); } //update gradient for batch element i std::size_t param = 0; for (std::size_t i = 0; i != numNeurons; ++i){ for (std::size_t j = 0; j != numUnits; ++j){ if(!mpe_structure->connection(i,j))continue; for(std::size_t t=1;t != sequence.size(); ++t) gradient(param)+=errorDerivative(t,i) * sequence[t-1](j); ++param; } } //sanity check SIZE_CHECK(param == mpe_structure->parameters()); } }
void Softmax::weightedInputDerivative( BatchInputType const& patterns, BatchOutputType const& coefficients, State const& state, BatchOutputType& gradient )const{ SIZE_CHECK(patterns.size2() == inputSize()); SIZE_CHECK(coefficients.size2()==patterns.size2()); SIZE_CHECK(coefficients.size1()==patterns.size1()); InternalState const& s = state.toState<InternalState>(); gradient.resize(patterns.size1(),inputSize()); gradient.clear(); if(inputSize() ==1){ for(size_t i = 0; i != patterns.size1(); ++i){ double sdx= s.results(i,0)*(1-s.results(i,0)); gradient(i,0) = coefficients(i,1)+(coefficients(i,0)-coefficients(i,1))*sdx; } } else{ for(size_t i = 0; i != patterns.size1(); ++i){ double mass=inner_prod(row(coefficients,i),row(s.results,i)); //(c_k-m)*f_k noalias(row(gradient,i)) = (row(coefficients,i) - mass) *row(s.results,i); } } }
void SigmoidModel::eval(BatchInputType const&patterns, BatchOutputType& outputs, State& state)const{ eval(patterns,outputs); InternalState& s = state.toState<InternalState>(); s.resize(patterns.size1()); noalias(s.result) = column(outputs,0); }
void Softmax::eval(BatchInputType const& patterns,BatchOutputType& outputs, State& state)const{ eval(patterns,outputs); InternalState& s = state.toState<InternalState>(); s.resize(patterns.size1(),outputSize()); noalias(s.results) = outputs; }