예제 #1
0
void RNNet::eval(BatchInputType const& patterns, BatchOutputType& outputs, State& state)const{
	//initialize the history for the whole batch of sequences
	InternalState& s = state.toState<InternalState>();
	std::size_t warmUpLength=m_warmUpSequence.size();
	std::size_t numUnits = mpe_structure->numberOfUnits();
	s.timeActivation.resize(patterns.size());
	outputs.resize(patterns.size());

	//calculation of the sequences
	for(std::size_t b = 0; b != patterns.size();++b){
		std::size_t sequenceLength = patterns[b].size()+warmUpLength+1;
		s.timeActivation[b].resize(sequenceLength,RealVector(numUnits));
		outputs[b].resize(patterns[b].size(),RealVector(numUnits));
		Sequence& sequence = s.timeActivation[b];
		sequence[0].clear();
		for (std::size_t t = 1; t < sequenceLength;t++){
			//we want to treat input neurons exactly as hidden or output neurons, so we copy the current
			//pattern at the beginning of the the last activation pattern. After that, all activations
			//required for this timestep are in s.timeActivation[t-1]
			if(t<=warmUpLength)
				//we are still in warm up phase
				noalias(subrange(sequence[t-1],0,inputSize())) = m_warmUpSequence[t-1];
			else
				noalias(subrange(sequence[t-1],0,inputSize())) = patterns[b][t-1-warmUpLength];
			//and set the bias to 1
			sequence[t-1](mpe_structure->bias())=1;

			//activation of the hidden neurons is now just a matrix vector multiplication
			noalias(subrange(sequence[t],inputSize()+1,numUnits)) = prod(
				mpe_structure->weights(),
				sequence[t-1]
			);
			//now apply the sigmoid function
			for (std::size_t i = inputSize()+1;i != numUnits;i++)
				sequence[t](i) = mpe_structure->neuron(sequence[t](i));
			
			//if the warmup is over, we can copy the results into the output
			if(t>warmUpLength)
				outputs[b][t-1-warmUpLength] = subrange(sequence[t],numUnits-outputSize(),numUnits);
			
		}
	}
}
예제 #2
0
void RNNet::weightedParameterDerivative(
	BatchInputType const& patterns, BatchInputType const& coefficients, 
	State const& state, RealVector& gradient
)const{
	//SIZE_CHECK(pattern.size() == coefficients.size());
	InternalState const& s = state.toState<InternalState>();
	gradient.resize(numberOfParameters());
	gradient.clear();
	
	std::size_t numUnits = mpe_structure->numberOfUnits();
	std::size_t numNeurons = mpe_structure->numberOfNeurons();
	std::size_t warmUpLength=m_warmUpSequence.size();
	for(std::size_t b = 0; b != patterns.size(); ++b){
		Sequence const& sequence = s.timeActivation[b];
		std::size_t sequenceLength = s.timeActivation[b].size();
		RealMatrix errorDerivative(sequenceLength,numNeurons);
		errorDerivative.clear();
		//copy errors
		for (std::size_t t = warmUpLength+1; t != sequenceLength; ++t)
			for(std::size_t i = 0; i != outputSize(); ++i)
				errorDerivative(t,i+numNeurons-outputSize())=coefficients[b][t-warmUpLength-1](i);
		
		//backprop through time
		for (std::size_t t = (int)sequence.size()-1; t > 0; t--){
			for (std::size_t j = 0; j != numNeurons; ++j){
				double derivative = mpe_structure->neuronDerivative(sequence[t](j+mpe_structure->inputs()+1));
				errorDerivative(t,j)*=derivative;
			}
			noalias(row(errorDerivative,t-1)) += prod(
				trans(columns(mpe_structure->weights(), inputSize()+1,numUnits)),
				row(errorDerivative,t)
			);
		}
		
		
		//update gradient for batch element i
		std::size_t param = 0;
		for (std::size_t i = 0; i != numNeurons; ++i){
			for (std::size_t j = 0; j != numUnits; ++j){
				if(!mpe_structure->connection(i,j))continue;

				for(std::size_t t=1;t != sequence.size(); ++t)
					gradient(param)+=errorDerivative(t,i) * sequence[t-1](j);
				
				++param;
			}
		}
		//sanity check
		SIZE_CHECK(param == mpe_structure->parameters());
	}
}