Ejemplo n.º 1
0
DecisionStump<MatType>::DecisionStump(const MatType& data,
                                      const arma::Row<size_t>& labels,
                                      const size_t classes,
                                      size_t inpBucketSize)
{
  numClass = classes;
  bucketSize = inpBucketSize;

  // If classLabels are not all identical, proceed with training.
  int bestAtt = 0;
  double entropy;
  const double rootEntropy = CalculateEntropy<size_t>(
      labels.subvec(0, labels.n_elem - 1));

  double gain, bestGain = 0.0;
  for (int i = 0; i < data.n_rows; i++)
  {
    // Go through each attribute of the data.
    if (IsDistinct<double>(data.row(i)))
    {
      // For each attribute with non-identical values, treat it as a potential
      // splitting attribute and calculate entropy if split on it.
      entropy = SetupSplitAttribute(data.row(i), labels);

      // Rcpp::Rcout << "Entropy for attribute " << i << " is " << entropy << ".\n";
      gain = rootEntropy - entropy;
      // Find the attribute with the best entropy so that the gain is
      // maximized.

      // if (entropy < bestEntropy)
      // Instead of the above rule, we are maximizing gain, which was
      // what is returned from SetupSplitAttribute.
      if (gain < bestGain)
      {
        bestAtt = i;
        bestGain = gain;
      }
    }
  }
  splitAttribute = bestAtt;

  // Once the splitting column/attribute has been decided, train on it.
  TrainOnAtt<double>(data.row(splitAttribute), labels);
}
Ejemplo n.º 2
0
void DecisionStump<MatType>::Train(const MatType& data,
                                   const arma::Row<size_t>& labels,
                                   const arma::rowvec& weights)
{
  this->classes = classes;
  this->bucketSize = bucketSize;

  // If classLabels are not all identical, proceed with training.
  size_t bestDim = 0;
  double entropy;
  const double rootEntropy = CalculateEntropy<UseWeights>(labels, weights);

  double gain, bestGain = 0.0;
  for (size_t i = 0; i < data.n_rows; i++)
  {
    // Go through each dimension of the data.
    if (IsDistinct(data.row(i)))
    {
      // For each dimension with non-identical values, treat it as a potential
      // splitting dimension and calculate entropy if split on it.
      entropy = SetupSplitDimension<UseWeights>(data.row(i), labels, weights);

      gain = rootEntropy - entropy;
      // Find the dimension with the best entropy so that the gain is
      // maximized.

      // We are maximizing gain, which is what is returned from
      // SetupSplitDimension().
      if (gain < bestGain)
      {
        bestDim = i;
        bestGain = gain;
      }
    }
  }
  splitDimension = bestDim;

  // Once the splitting column/dimension has been decided, train on it.
  TrainOnDim(data.row(splitDimension), labels);
}
RegularizedSVDFunction<MatType>::RegularizedSVDFunction(const MatType& data,
                                                        const size_t rank,
                                                        const double lambda) :
    data(math::MakeAlias(const_cast<MatType&>(data), false)),
    rank(rank),
    lambda(lambda)
{
  // Number of users and items in the data.
  numUsers = max(data.row(0)) + 1;
  numItems = max(data.row(1)) + 1;

  // Initialize the parameters.
  initialPoint.randu(rank, numUsers + numItems);
}