예제 #1
0
// subtract the mean from each row
void ZeroSum(RealMatrix& mat)
{
	RealVector sum(mat.size2(), 0.0);
	for (size_t j=0; j<mat.size1(); j++) sum += row(mat, j);
	RealVector mean = (1.0 / mat.size1()) * sum;
	for (size_t j=0; j<mat.size1(); j++) row(mat, j) -= mean;
}
예제 #2
0
		// adds just a value c on the input
		void eval(RealMatrix const& patterns, RealMatrix& output, State& state)const
		{
			output.resize(patterns.size1(),m_dim);
			for(std::size_t  p = 0; p != patterns.size1();++p){
				for (size_t i=0;i!=m_dim;++i)
					output(p,i)=patterns(p,i)+m_c;
			}
		}
예제 #3
0
void OnlineRNNet::weightedParameterDerivative(RealMatrix const& pattern, const RealMatrix& coefficients,  RealVector& gradient) {
    SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time.
    SIZE_CHECK(coefficients.size1()==1);
    SIZE_CHECK(pattern.size2() == inputSize());
    SIZE_CHECK(pattern.size2() == coefficients.size2());
    gradient.resize(mpe_structure->parameters());

    std::size_t numNeurons = mpe_structure->numberOfNeurons();
    std::size_t numUnits = mpe_structure->numberOfUnits();

    //first check wether this is the first call of the derivative after a change of internal structure. in this case we have to allocate A LOT
    //of memory for the derivative and set it to zero.
    if(m_unitGradient.size1() != mpe_structure->parameters() || m_unitGradient.size2() != numNeurons) {
        m_unitGradient.resize(mpe_structure->parameters(),numNeurons);
        m_unitGradient.clear();
    }

    //for the next steps see Kenji Doya, "Recurrent Networks: Learning Algorithms"

    //calculate the derivative for all neurons f'
    RealVector neuronDerivatives(numNeurons);
    for(std::size_t i=0; i!=numNeurons; ++i) {
        neuronDerivatives(i)=mpe_structure->neuronDerivative(m_activation(i+inputSize()+1));
    }

    //calculate the derivative for every weight using the derivative of the last time step
    auto hiddenWeights = columns(
                             mpe_structure->weights(),
                             inputSize()+1,numUnits
                         );

    //update the new gradient with the effect of last timestep
    noalias(m_unitGradient) = prod(m_unitGradient,trans(hiddenWeights));

    //add the effect of the current time step
    std::size_t param = 0;
    for(std::size_t i = 0; i != numNeurons; ++i) {
        for(std::size_t j = 0; j != numUnits; ++j) {
            if(mpe_structure->connection(i,j)) {
                m_unitGradient(param,i) += m_lastActivation(j);
                ++param;
            }
        }
    }

    //multiply with outer derivative of the neurons
    for(std::size_t i = 0; i != m_unitGradient.size1(); ++i) {
        noalias(row(m_unitGradient,i)) = element_prod(row(m_unitGradient,i),neuronDerivatives);
    }
    //and formula 4 (the gradient itself)
    noalias(gradient) = prod(
                            columns(m_unitGradient,numNeurons-outputSize(),numNeurons),
                            row(coefficients,0)
                        );
    //sanity check
    SIZE_CHECK(param == mpe_structure->parameters());
}
예제 #4
0
		virtual void weightedParameterDerivative( RealMatrix const& input, RealMatrix const& coefficients, State const& state, RealVector& derivative)const
		{
			derivative.resize(1);
			derivative(0)=0;
			for (size_t p = 0; p < coefficients.size1(); p++)
			{
				derivative(0) +=sum(row(coefficients,p));
			}
		}
예제 #5
0
	double evalDerivative(
		RealMatrix const&, 
		RealMatrix const& prediction, 
		RealMatrix& gradient
	) const {
		gradient.resize(prediction.size1(),prediction.size2());
		gradient.clear();
		return 0;
	}
예제 #6
0
파일: CMAC.cpp 프로젝트: Shark-ML/Shark
void CMACMap::weightedParameterDerivative(
    RealMatrix const& patterns,
    RealMatrix const& coefficients,
    State const&,//not needed
    RealVector &gradient
) const {
    SIZE_CHECK(patterns.size2() == m_inputSize);
    SIZE_CHECK(coefficients.size2() == m_outputSize);
    SIZE_CHECK(coefficients.size1() == patterns.size1());
    std::size_t numPatterns = patterns.size1();
    gradient.resize(m_parameters.size());
    gradient.clear();
    for(std::size_t i = 0; i != numPatterns; ++i) {
        std::vector<std::size_t> indizes = getIndizes(row(patterns,i));
        for (std::size_t o=0; o!=m_outputSize; ++o) {
            for (std::size_t j=0; j != m_tilings; ++j) {
                gradient(indizes[j] + o*m_parametersPerTiling) += coefficients(i,o);
            }
        }
    }
}
예제 #7
0
void DiscreteLoss::defineCostMatrix(RealMatrix const& cost){
	// check validity
	std::size_t size = cost.size1();
	SHARK_ASSERT(cost.size2() == size);
	for (std::size_t i = 0; i != size; i++){
		for (std::size_t j = 0; j != size; j++){
			SHARK_ASSERT(cost(i, j) >= 0.0);
		}
		SHARK_ASSERT(cost(i, i) == 0.0);
	}
	m_cost = cost;
}
RealMatrix NormalizeComponentsWhitening::createWhiteningMatrix(
	RealMatrix& covariance
){
	SIZE_CHECK(covariance.size1() == covariance.size2());
	std::size_t m = covariance.size1();
	//we use the inversed cholesky decomposition for whitening
	//since we have to assume that covariance does not have full rank, we use
	//the generalized decomposition
	RealMatrix whiteningMatrix(m,m,0.0);

	//do a pivoting cholesky decomposition
	//this destroys the covariance matrix as it is not neeeded anymore afterwards.
	PermutationMatrix permutation(m);
	std::size_t rank = pivotingCholeskyDecompositionInPlace(covariance,permutation);
	//only take the nonzero columns as C
	auto C = columns(covariance,0,rank);

	//full rank, means that we can use the typical cholesky inverse with pivoting
	//so U is P C^-1 P^T
	if(rank == m){
		noalias(whiteningMatrix) = identity_matrix<double>( m );
		solveTriangularSystemInPlace<SolveXAB,upper>(trans(C),whiteningMatrix);
		swap_full_inverted(permutation,whiteningMatrix);
		return whiteningMatrix;
	}
	//complex case. 
	//A' = P C(C^TC)^-1(C^TC)^-1 C^T P^T
	//=> P^T U P = C(C^TC)^-1
	//<=> P^T U P (C^TC) = C
	RealMatrix CTC = prod(trans(C),C);

	auto submat = columns(whiteningMatrix,0,rank);
	solveSymmPosDefSystem<SolveXAB>(CTC,submat,C);
	swap_full_inverted(permutation,whiteningMatrix);

	return whiteningMatrix;
}
예제 #9
0
파일: CMAC.cpp 프로젝트: Shark-ML/Shark
void CMACMap::eval(RealMatrix const& patterns,RealMatrix &output) const {
    SIZE_CHECK(patterns.size2() == m_inputSize);
    std::size_t numPatterns = patterns.size1();
    output.resize(numPatterns,m_outputSize);
    output.clear();

    for(std::size_t i = 0; i != numPatterns; ++i) {
        std::vector<std::size_t> indizes = getIndizes(row(patterns,i));
        for (std::size_t o=0; o!=m_outputSize; ++o) {
            for (std::size_t j = 0; j != m_tilings; ++j) {
                output(i,o) += m_parameters(indizes[j] + o*m_parametersPerTiling);
            }
        }
    }
}
예제 #10
0
파일: PCA.cpp 프로젝트: EQ94/Shark
//! Returns a model mapping the original data to the
//! m-dimensional PCA coordinate system.
void PCA::encoder(LinearModel<>& model, std::size_t m) {
	if(!m) m = std::min(m_n,m_l);
	
	RealMatrix A = trans(columns(m_eigenvectors, 0, m) );
	RealVector offset = -prod(A, m_mean);
	if(m_whitening){
		for(std::size_t i=0; i<A.size1(); i++) {
			//take care of numerical difficulties for very small eigenvalues.
			if(m_eigenvalues(i)/m_eigenvalues(0) < 1.e-15){
				row(A,i).clear();
				offset(i) = 0;			
			}
			else{
				row(A, i) /= std::sqrt(m_eigenvalues(i));
				offset(i) /= std::sqrt(m_eigenvalues(i));
			}
		}
	}
	model.setStructure(A, offset);
}
예제 #11
0
void OnlineRNNet::eval(RealMatrix const& pattern, RealMatrix& output){
	SIZE_CHECK(pattern.size1()==1);//we can only process a single input at a time.
	SIZE_CHECK(pattern.size2() == inputSize());
	
	std::size_t numUnits = mpe_structure->numberOfUnits();
	
	if(m_lastActivation.size() != numUnits){
		m_activation.resize(numUnits);
		m_lastActivation.resize(numUnits);
		zero(m_activation);
		zero(m_lastActivation);
	}
	swap(m_lastActivation,m_activation);

	//we want to treat input and bias neurons exactly as hidden or output neurons, so we copy the current
	//pattern at the beginning of the the last activation pattern aand set the bias neuron to 1
	////so m_lastActivation has the format (input|1|lastNeuronActivation)
	noalias(subrange(m_lastActivation,0,mpe_structure->inputs())) = row(pattern,0);
	m_lastActivation(mpe_structure->bias())=1;
	m_activation(mpe_structure->bias())=1;

	//activation of the hidden neurons is now just a matrix vector multiplication

	fast_prod(
		mpe_structure->weights(),
		m_lastActivation,
		subrange(m_activation,inputSize()+1,numUnits)
	);

	//now apply the sigmoid function
	for (std::size_t i = inputSize()+1;i != numUnits;i++){
		m_activation(i) = mpe_structure->neuron(m_activation(i));
	}
	//copy the result to the output
	output.resize(1,outputSize());
	noalias(row(output,0)) = subrange(m_activation,numUnits-outputSize(),numUnits);
}
예제 #12
0
파일: LDA.cpp 프로젝트: ghisvail/Shark
void LDA::train(LinearClassifier<>& model, WeightedLabeledData<RealVector,unsigned int> const& dataset){
	if(dataset.empty()){
		throw SHARKEXCEPTION("[LDA::train] the dataset must not be empty");
	}
	std::size_t dim = inputDimension(dataset);
	std::size_t classes = numberOfClasses(dataset);

	//required statistics
	RealMatrix means(classes, dim,0.0);
	RealMatrix covariance(dim, dim,0.0);
	double weightSum = sumOfWeights(dataset);
	RealVector classWeight(classes,0.0);
	
	//we compute the data batch wise
	for(auto const& batch: dataset.batches()){
		UIntVector const& labels = batch.data.label;
		RealMatrix points = batch.data.input;
		RealVector const& weights = batch.weight;
		//load batch and update mean
		std::size_t currentBatchSize = points.size1();
		for (std::size_t e=0; e != currentBatchSize; e++){
			//update mean and class count for this sample
			std::size_t c = labels(e);
			classWeight(c) += weights(e);
			noalias(row(means,c)) += weights(e)*row(points,e);
			row(points,e) *= std::sqrt(weights(e));
			
		}
		//update second moment matrix
		noalias(covariance) += prod(trans(points),points);
	}
	covariance /= weightSum;
	
	//calculate mean and the covariance matrix from second moment
	for (std::size_t c = 0; c != classes; c++){
		if (classWeight[c] == 0.0) 
			throw SHARKEXCEPTION("[LDA::train] LDA can not handle a class without examples");
		row(means,c) /= classWeight(c);
		double factor = classWeight(c) / weightSum;
		noalias(covariance)-= factor*outer_prod(row(means,c),row(means,c));
	}
	

	//add regularization
	diag(covariance) += m_regularization;
	
	//the formula for the linear classifier is
	// arg max_i log(P(x|i) * P(i))
	//= arg max_i log(P(x|i)) +log(P(i))
	//= arg max_i -(x-m_i)^T C^-1 (x-m_i) +log(P(i))
	//= arg max_i -m_i^T C^-1 m_i  +2* x^T C^-1 m_i + log(P(i))
	//so we compute first C^-1 m_i and then the first term
	
	// compute z = m_i^T C^-1  <=>  z C = m_i 
	// this is the expensive step of the calculation.
	RealMatrix transformedMeans = means;
	blas::solveSymmSemiDefiniteSystemInPlace<blas::SolveXAB>(covariance,transformedMeans);
	
	//compute bias terms m_i^T C^-1 m_i - log(P(i))
	RealVector bias(classes);
	for(std::size_t c = 0; c != classes; ++c){
		double prior = std::log(classWeight(c)/weightSum);
		bias(c) = - 0.5* inner_prod(row(means,c),row(transformedMeans,c)) + prior;
	}

	//fill the model
	model.decisionFunction().setStructure(transformedMeans,bias);
}