예제 #1
0
파일: Dataset.cpp 프로젝트: inglada/Shark
void testDatasetEquality(LabeledData<int, int> const& set1, LabeledData<int, int> const& set2){
	BOOST_REQUIRE_EQUAL(set1.numberOfBatches(),set2.numberOfBatches());
	BOOST_REQUIRE_EQUAL(set1.numberOfElements(),set2.numberOfElements());
	for(std::size_t i = 0; i != set1.numberOfBatches(); ++i){
		BOOST_REQUIRE_EQUAL(set1.batch(i).input.size(),set1.batch(i).label.size());
		BOOST_REQUIRE_EQUAL(set2.batch(i).input.size(),set2.batch(i).label.size());
	}
	testSetEquality(set1.inputs(),set2.inputs());
	testSetEquality(set1.labels(),set2.labels());
}
예제 #2
0
void LinearRegression::train(LinearModel<>& model, LabeledData<RealVector, RealVector> const& dataset){
	std::size_t inputDim = inputDimension(dataset);
	std::size_t outputDim = labelDimension(dataset);
	std::size_t numInputs = dataset.numberOfElements();
	std::size_t numBatches = dataset.numberOfBatches();

	//Let P be the matrix of points with n rows and X=(P|1). the 1 rpresents the bias weight
	//Let A = X^T X + lambda * I
	//than whe have (for lambda = 0)
	//A = ( P^T P  P^T 1)
	//       ( 1^T P  1^T1)
	RealMatrix matA(inputDim+1,inputDim+1,0.0);
	blas::Blocking<RealMatrix> Ablocks(matA,inputDim,inputDim);
	//compute A and the label matrix batchwise
	typedef LabeledData<RealVector, RealVector>::const_batch_reference BatchRef;
	for (std::size_t b=0; b != numBatches; b++){
		BatchRef batch = dataset.batch(b);
		symm_prod(trans(batch.input),Ablocks.upperLeft(),false);
		noalias(column(Ablocks.upperRight(),0))+=sum_rows(batch.input);
	}
	row(Ablocks.lowerLeft(),0) = column(Ablocks.upperRight(),0);
	matA(inputDim,inputDim) = numInputs;
	//X^TX+=lambda* I
	diag(Ablocks.upperLeft())+= blas::repeat(m_regularization,inputDim);
	
	
	//we also need to compute X^T L= (P^TL, 1^T L) where L is the matrix of labels 
	RealMatrix XTL(inputDim + 1,outputDim,0.0);
	for (std::size_t b=0; b != numBatches; b++){
		BatchRef batch = dataset.batch(b);
		RealSubMatrix PTL = subrange(XTL,0,inputDim,0,outputDim);
		axpy_prod(trans(batch.input),batch.label,PTL,false);
		noalias(row(XTL,inputDim))+=sum_rows(batch.label);
	}	
	
	//we solve the system A Beta = X^T L
	//usually this is solved via the moore penrose inverse:
	//Beta = A^-1 T
	//but it is faster und numerically more stable, if we solve it as a symmetric system
	//w can use in-place solve
	RealMatrix&  beta = XTL;
	blas::solveSymmSemiDefiniteSystemInPlace<blas::SolveAXB>(matA,beta);
	
	RealMatrix matrix = subrange(trans(beta), 0, outputDim, 0, inputDim);
	RealVector offset = row(beta,inputDim);
	
	// write parameters into the model
	model.setStructure(matrix, offset);
}
예제 #3
0
void CMySharkML::Features2SharkData(LabeledData<RealVector, unsigned int> &dataset,	cv::Mat &features, std::vector<int> &v_label)
{
	//copy rows of the file into the dataset
	std::size_t rows = features.rows;
	std::size_t dimensions = features.cols;
	std::vector<std::size_t> batchSizes = shark::detail::optimalBatchSizes(rows, 256);

	// Test data
	dataset = LabeledData<RealVector, unsigned int>(batchSizes.size());
	std::size_t currentRow = 0;
	for(std::size_t b = 0; b != batchSizes.size(); ++b) {
		RealMatrix& inputs = dataset.batch(b).input;
		UIntVector& labels = dataset.batch(b).label;
		inputs.resize(batchSizes[b], dimensions);
		labels.resize(batchSizes[b]);
		//copy the rows into the batch
		for(std::size_t i = 0; i != batchSizes[b]; ++i,++currentRow){
			int rawLabel = v_label[currentRow];
			labels[i] = rawLabel; 
			for(std::size_t j = 0; j != dimensions; ++j){
				inputs(i,j) = features.at<float>(currentRow, j);
			}
		}
	}

}