DecisionStump<MatType>::DecisionStump(const MatType& data, const arma::Row<size_t>& labels, const size_t classes, size_t inpBucketSize) { numClass = classes; bucketSize = inpBucketSize; // If classLabels are not all identical, proceed with training. int bestAtt = 0; double entropy; const double rootEntropy = CalculateEntropy<size_t>( labels.subvec(0, labels.n_elem - 1)); double gain, bestGain = 0.0; for (int i = 0; i < data.n_rows; i++) { // Go through each attribute of the data. if (IsDistinct<double>(data.row(i))) { // For each attribute with non-identical values, treat it as a potential // splitting attribute and calculate entropy if split on it. entropy = SetupSplitAttribute(data.row(i), labels); // Rcpp::Rcout << "Entropy for attribute " << i << " is " << entropy << ".\n"; gain = rootEntropy - entropy; // Find the attribute with the best entropy so that the gain is // maximized. // if (entropy < bestEntropy) // Instead of the above rule, we are maximizing gain, which was // what is returned from SetupSplitAttribute. if (gain < bestGain) { bestAtt = i; bestGain = gain; } } } splitAttribute = bestAtt; // Once the splitting column/attribute has been decided, train on it. TrainOnAtt<double>(data.row(splitAttribute), labels); }
void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels, const arma::rowvec& weights) { this->classes = classes; this->bucketSize = bucketSize; // If classLabels are not all identical, proceed with training. size_t bestDim = 0; double entropy; const double rootEntropy = CalculateEntropy<UseWeights>(labels, weights); double gain, bestGain = 0.0; for (size_t i = 0; i < data.n_rows; i++) { // Go through each dimension of the data. if (IsDistinct(data.row(i))) { // For each dimension with non-identical values, treat it as a potential // splitting dimension and calculate entropy if split on it. entropy = SetupSplitDimension<UseWeights>(data.row(i), labels, weights); gain = rootEntropy - entropy; // Find the dimension with the best entropy so that the gain is // maximized. // We are maximizing gain, which is what is returned from // SetupSplitDimension(). if (gain < bestGain) { bestDim = i; bestGain = gain; } } } splitDimension = bestDim; // Once the splitting column/dimension has been decided, train on it. TrainOnDim(data.row(splitDimension), labels); }
RegularizedSVDFunction<MatType>::RegularizedSVDFunction(const MatType& data, const size_t rank, const double lambda) : data(math::MakeAlias(const_cast<MatType&>(data), false)), rank(rank), lambda(lambda) { // Number of users and items in the data. numUsers = max(data.row(0)) + 1; numItems = max(data.row(1)) + 1; // Initialize the parameters. initialPoint.randu(rank, numUsers + numItems); }