예제 #1
0
파일: Dataset.cpp 프로젝트: inglada/Shark
void testDatasetEquality(LabeledData<int, int> const& set1, LabeledData<int, int> const& set2){
	BOOST_REQUIRE_EQUAL(set1.numberOfBatches(),set2.numberOfBatches());
	BOOST_REQUIRE_EQUAL(set1.numberOfElements(),set2.numberOfElements());
	for(std::size_t i = 0; i != set1.numberOfBatches(); ++i){
		BOOST_REQUIRE_EQUAL(set1.batch(i).input.size(),set1.batch(i).label.size());
		BOOST_REQUIRE_EQUAL(set2.batch(i).input.size(),set2.batch(i).label.size());
	}
	testSetEquality(set1.inputs(),set2.inputs());
	testSetEquality(set1.labels(),set2.labels());
}
예제 #2
0
void LinearRegression::train(LinearModel<>& model, LabeledData<RealVector, RealVector> const& dataset){
	std::size_t inputDim = inputDimension(dataset);
	std::size_t outputDim = labelDimension(dataset);
	std::size_t numInputs = dataset.numberOfElements();
	std::size_t numBatches = dataset.numberOfBatches();

	//Let P be the matrix of points with n rows and X=(P|1). the 1 rpresents the bias weight
	//Let A = X^T X + lambda * I
	//than whe have (for lambda = 0)
	//A = ( P^T P  P^T 1)
	//       ( 1^T P  1^T1)
	RealMatrix matA(inputDim+1,inputDim+1,0.0);
	blas::Blocking<RealMatrix> Ablocks(matA,inputDim,inputDim);
	//compute A and the label matrix batchwise
	typedef LabeledData<RealVector, RealVector>::const_batch_reference BatchRef;
	for (std::size_t b=0; b != numBatches; b++){
		BatchRef batch = dataset.batch(b);
		symm_prod(trans(batch.input),Ablocks.upperLeft(),false);
		noalias(column(Ablocks.upperRight(),0))+=sum_rows(batch.input);
	}
	row(Ablocks.lowerLeft(),0) = column(Ablocks.upperRight(),0);
	matA(inputDim,inputDim) = numInputs;
	//X^TX+=lambda* I
	diag(Ablocks.upperLeft())+= blas::repeat(m_regularization,inputDim);
	
	
	//we also need to compute X^T L= (P^TL, 1^T L) where L is the matrix of labels 
	RealMatrix XTL(inputDim + 1,outputDim,0.0);
	for (std::size_t b=0; b != numBatches; b++){
		BatchRef batch = dataset.batch(b);
		RealSubMatrix PTL = subrange(XTL,0,inputDim,0,outputDim);
		axpy_prod(trans(batch.input),batch.label,PTL,false);
		noalias(row(XTL,inputDim))+=sum_rows(batch.label);
	}	
	
	//we solve the system A Beta = X^T L
	//usually this is solved via the moore penrose inverse:
	//Beta = A^-1 T
	//but it is faster und numerically more stable, if we solve it as a symmetric system
	//w can use in-place solve
	RealMatrix&  beta = XTL;
	blas::solveSymmSemiDefiniteSystemInPlace<blas::SolveAXB>(matA,beta);
	
	RealMatrix matrix = subrange(trans(beta), 0, outputDim, 0, inputDim);
	RealVector offset = row(beta,inputDim);
	
	// write parameters into the model
	model.setStructure(matrix, offset);
}