void testDatasetEquality(LabeledData<int, int> const& set1, LabeledData<int, int> const& set2){ BOOST_REQUIRE_EQUAL(set1.numberOfBatches(),set2.numberOfBatches()); BOOST_REQUIRE_EQUAL(set1.numberOfElements(),set2.numberOfElements()); for(std::size_t i = 0; i != set1.numberOfBatches(); ++i){ BOOST_REQUIRE_EQUAL(set1.batch(i).input.size(),set1.batch(i).label.size()); BOOST_REQUIRE_EQUAL(set2.batch(i).input.size(),set2.batch(i).label.size()); } testSetEquality(set1.inputs(),set2.inputs()); testSetEquality(set1.labels(),set2.labels()); }
void LinearRegression::train(LinearModel<>& model, LabeledData<RealVector, RealVector> const& dataset){ std::size_t inputDim = inputDimension(dataset); std::size_t outputDim = labelDimension(dataset); std::size_t numInputs = dataset.numberOfElements(); std::size_t numBatches = dataset.numberOfBatches(); //Let P be the matrix of points with n rows and X=(P|1). the 1 rpresents the bias weight //Let A = X^T X + lambda * I //than whe have (for lambda = 0) //A = ( P^T P P^T 1) // ( 1^T P 1^T1) RealMatrix matA(inputDim+1,inputDim+1,0.0); blas::Blocking<RealMatrix> Ablocks(matA,inputDim,inputDim); //compute A and the label matrix batchwise typedef LabeledData<RealVector, RealVector>::const_batch_reference BatchRef; for (std::size_t b=0; b != numBatches; b++){ BatchRef batch = dataset.batch(b); symm_prod(trans(batch.input),Ablocks.upperLeft(),false); noalias(column(Ablocks.upperRight(),0))+=sum_rows(batch.input); } row(Ablocks.lowerLeft(),0) = column(Ablocks.upperRight(),0); matA(inputDim,inputDim) = numInputs; //X^TX+=lambda* I diag(Ablocks.upperLeft())+= blas::repeat(m_regularization,inputDim); //we also need to compute X^T L= (P^TL, 1^T L) where L is the matrix of labels RealMatrix XTL(inputDim + 1,outputDim,0.0); for (std::size_t b=0; b != numBatches; b++){ BatchRef batch = dataset.batch(b); RealSubMatrix PTL = subrange(XTL,0,inputDim,0,outputDim); axpy_prod(trans(batch.input),batch.label,PTL,false); noalias(row(XTL,inputDim))+=sum_rows(batch.label); } //we solve the system A Beta = X^T L //usually this is solved via the moore penrose inverse: //Beta = A^-1 T //but it is faster und numerically more stable, if we solve it as a symmetric system //w can use in-place solve RealMatrix& beta = XTL; blas::solveSymmSemiDefiniteSystemInPlace<blas::SolveAXB>(matA,beta); RealMatrix matrix = subrange(trans(beta), 0, outputDim, 0, inputDim); RealVector offset = row(beta,inputDim); // write parameters into the model model.setStructure(matrix, offset); }
void CMySharkML::Features2SharkData(LabeledData<RealVector, unsigned int> &dataset, cv::Mat &features, std::vector<int> &v_label) { //copy rows of the file into the dataset std::size_t rows = features.rows; std::size_t dimensions = features.cols; std::vector<std::size_t> batchSizes = shark::detail::optimalBatchSizes(rows, 256); // Test data dataset = LabeledData<RealVector, unsigned int>(batchSizes.size()); std::size_t currentRow = 0; for(std::size_t b = 0; b != batchSizes.size(); ++b) { RealMatrix& inputs = dataset.batch(b).input; UIntVector& labels = dataset.batch(b).label; inputs.resize(batchSizes[b], dimensions); labels.resize(batchSizes[b]); //copy the rows into the batch for(std::size_t i = 0; i != batchSizes[b]; ++i,++currentRow){ int rawLabel = v_label[currentRow]; labels[i] = rawLabel; for(std::size_t j = 0; j != dimensions; ++j){ inputs(i,j) = features.at<float>(currentRow, j); } } } }