Exemple #1
0
void OnlineRNNet::weightedParameterDerivative(RealMatrix const& pattern, const RealMatrix& coefficients,  RealVector& gradient) {
    SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time.
    SIZE_CHECK(coefficients.size1()==1);
    SIZE_CHECK(pattern.size2() == inputSize());
    SIZE_CHECK(pattern.size2() == coefficients.size2());
    gradient.resize(mpe_structure->parameters());

    std::size_t numNeurons = mpe_structure->numberOfNeurons();
    std::size_t numUnits = mpe_structure->numberOfUnits();

    //first check wether this is the first call of the derivative after a change of internal structure. in this case we have to allocate A LOT
    //of memory for the derivative and set it to zero.
    if(m_unitGradient.size1() != mpe_structure->parameters() || m_unitGradient.size2() != numNeurons) {
        m_unitGradient.resize(mpe_structure->parameters(),numNeurons);
        m_unitGradient.clear();
    }

    //for the next steps see Kenji Doya, "Recurrent Networks: Learning Algorithms"

    //calculate the derivative for all neurons f'
    RealVector neuronDerivatives(numNeurons);
    for(std::size_t i=0; i!=numNeurons; ++i) {
        neuronDerivatives(i)=mpe_structure->neuronDerivative(m_activation(i+inputSize()+1));
    }

    //calculate the derivative for every weight using the derivative of the last time step
    auto hiddenWeights = columns(
                             mpe_structure->weights(),
                             inputSize()+1,numUnits
                         );

    //update the new gradient with the effect of last timestep
    noalias(m_unitGradient) = prod(m_unitGradient,trans(hiddenWeights));

    //add the effect of the current time step
    std::size_t param = 0;
    for(std::size_t i = 0; i != numNeurons; ++i) {
        for(std::size_t j = 0; j != numUnits; ++j) {
            if(mpe_structure->connection(i,j)) {
                m_unitGradient(param,i) += m_lastActivation(j);
                ++param;
            }
        }
    }

    //multiply with outer derivative of the neurons
    for(std::size_t i = 0; i != m_unitGradient.size1(); ++i) {
        noalias(row(m_unitGradient,i)) = element_prod(row(m_unitGradient,i),neuronDerivatives);
    }
    //and formula 4 (the gradient itself)
    noalias(gradient) = prod(
                            columns(m_unitGradient,numNeurons-outputSize(),numNeurons),
                            row(coefficients,0)
                        );
    //sanity check
    SIZE_CHECK(param == mpe_structure->parameters());
}
Exemple #2
0
void CMACMap::eval(RealMatrix const& patterns,RealMatrix &output) const{
	SIZE_CHECK(patterns.size2() == m_inputSize);
	std::size_t numPatterns = patterns.size1();
	output.resize(numPatterns, m_outputShape.numElements());
	output.clear();
	
	for(std::size_t i = 0; i != numPatterns; ++i){
		std::vector<std::size_t> indizes = getIndizes(row(patterns,i));
		for (std::size_t o = 0; o != output.size2(); ++o) {
			for (std::size_t j = 0; j != m_tilings; ++j) {
				output(i,o) += m_parameters(indizes[j] + o*m_parametersPerTiling);
			}
		}
	}
}
// subtract the mean from each row
void ZeroSum(RealMatrix& mat)
{
	RealVector sum(mat.size2(), 0.0);
	for (size_t j=0; j<mat.size1(); j++) sum += row(mat, j);
	RealVector mean = (1.0 / mat.size1()) * sum;
	for (size_t j=0; j<mat.size1(); j++) row(mat, j) -= mean;
}
	double evalDerivative(
		RealMatrix const&, 
		RealMatrix const& prediction, 
		RealMatrix& gradient
	) const {
		gradient.resize(prediction.size1(),prediction.size2());
		gradient.clear();
		return 0;
	}
Exemple #5
0
void CMACMap::weightedParameterDerivative(
    RealMatrix const& patterns,
    RealMatrix const& coefficients,
    State const&,//not needed
    RealVector &gradient
) const {
    SIZE_CHECK(patterns.size2() == m_inputSize);
    SIZE_CHECK(coefficients.size2() == m_outputSize);
    SIZE_CHECK(coefficients.size1() == patterns.size1());
    std::size_t numPatterns = patterns.size1();
    gradient.resize(m_parameters.size());
    gradient.clear();
    for(std::size_t i = 0; i != numPatterns; ++i) {
        std::vector<std::size_t> indizes = getIndizes(row(patterns,i));
        for (std::size_t o=0; o!=m_outputSize; ++o) {
            for (std::size_t j=0; j != m_tilings; ++j) {
                gradient(indizes[j] + o*m_parametersPerTiling) += coefficients(i,o);
            }
        }
    }
}
void DiscreteLoss::defineCostMatrix(RealMatrix const& cost){
	// check validity
	std::size_t size = cost.size1();
	SHARK_ASSERT(cost.size2() == size);
	for (std::size_t i = 0; i != size; i++){
		for (std::size_t j = 0; j != size; j++){
			SHARK_ASSERT(cost(i, j) >= 0.0);
		}
		SHARK_ASSERT(cost(i, i) == 0.0);
	}
	m_cost = cost;
}
Exemple #7
0
//! Returns a model mapping encoded data from the
//! m-dimensional PCA coordinate system back to the
//! n-dimensional original coordinate system.
void PCA::decoder(LinearModel<>& model, std::size_t m) {
	if(!m) m = std::min(m_n,m_l);
	if( m == m_n && !m_whitening){
		model.setStructure(m_eigenvectors, m_mean);
	}
	RealMatrix A = columns(m_eigenvectors, 0, m);
	if(m_whitening){
		for(std::size_t i=0; i<A.size2(); i++) {
			//take care of numerical difficulties for very small eigenvalues.
			if(m_eigenvalues(i)/m_eigenvalues(0) < 1.e-15){
				column(A,i).clear();		
			}
			else{
				column(A, i) = column(A, i) * std::sqrt(m_eigenvalues(i));
			}
		}
	}

	model.setStructure(A, m_mean);
}
RealMatrix NormalizeComponentsWhitening::createWhiteningMatrix(
	RealMatrix& covariance
){
	SIZE_CHECK(covariance.size1() == covariance.size2());
	std::size_t m = covariance.size1();
	//we use the inversed cholesky decomposition for whitening
	//since we have to assume that covariance does not have full rank, we use
	//the generalized decomposition
	RealMatrix whiteningMatrix(m,m,0.0);

	//do a pivoting cholesky decomposition
	//this destroys the covariance matrix as it is not neeeded anymore afterwards.
	PermutationMatrix permutation(m);
	std::size_t rank = pivotingCholeskyDecompositionInPlace(covariance,permutation);
	//only take the nonzero columns as C
	auto C = columns(covariance,0,rank);

	//full rank, means that we can use the typical cholesky inverse with pivoting
	//so U is P C^-1 P^T
	if(rank == m){
		noalias(whiteningMatrix) = identity_matrix<double>( m );
		solveTriangularSystemInPlace<SolveXAB,upper>(trans(C),whiteningMatrix);
		swap_full_inverted(permutation,whiteningMatrix);
		return whiteningMatrix;
	}
	//complex case. 
	//A' = P C(C^TC)^-1(C^TC)^-1 C^T P^T
	//=> P^T U P = C(C^TC)^-1
	//<=> P^T U P (C^TC) = C
	RealMatrix CTC = prod(trans(C),C);

	auto submat = columns(whiteningMatrix,0,rank);
	solveSymmPosDefSystem<SolveXAB>(CTC,submat,C);
	swap_full_inverted(permutation,whiteningMatrix);

	return whiteningMatrix;
}
Exemple #9
0
void OnlineRNNet::eval(RealMatrix const& pattern, RealMatrix& output){
	SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time.
	SIZE_CHECK(pattern.size2() == inputSize());
	
	std::size_t numUnits = mpe_structure->numberOfUnits();
	
	if(m_lastActivation.size() != numUnits){
		m_activation.resize(numUnits);
		m_lastActivation.resize(numUnits);
		zero(m_activation);
		zero(m_lastActivation);
	}
	swap(m_lastActivation,m_activation);

	//we want to treat input and bias neurons exactly as hidden or output neurons, so we copy the current
	//pattern at the beginning of the the last activation pattern aand set the bias neuron to 1
	////so m_lastActivation has the format (input|1|lastNeuronActivation)
	noalias(subrange(m_lastActivation,0,mpe_structure->inputs())) = row(pattern,0);
	m_lastActivation(mpe_structure->bias())=1;
	m_activation(mpe_structure->bias())=1;

	//activation of the hidden neurons is now just a matrix vector multiplication

	fast_prod(
		mpe_structure->weights(),
		m_lastActivation,
		subrange(m_activation,inputSize()+1,numUnits)
	);

	//now apply the sigmoid function
	for (std::size_t i = inputSize()+1;i != numUnits;i++){
		m_activation(i) = mpe_structure->neuron(m_activation(i));
	}
	//copy the result to the output
	output.resize(1,outputSize());
	noalias(row(output,0)) = subrange(m_activation,numUnits-outputSize(),numUnits);
}