Пример #1
0
///
/// \brief Vespucci::Math::Quantification::FindBandwidthMat
/// \param X
/// \param abscissa
/// \param min
/// \param max
/// \param midlines
/// \param baselines
/// \return
/// Finds the bandwidth of every column of a arma::matrix.
arma::vec Vespucci::Math::Quantification::FindBandwidthMat(const arma::mat &X, arma::vec abscissa, double &min, double &max, arma::mat &midlines, arma::mat &baselines, arma::uvec &boundaries)
{
    double delta = std::abs(abscissa(1) - abscissa(0));
    arma::uvec left_bound = find(((min-delta) <= abscissa) && (abscissa <= (min+delta)));
    arma::uvec right_bound = find(((max-delta) <= abscissa) && (abscissa <= (max+delta)));

    arma::uword min_index = left_bound(0);
    arma::uword max_index = right_bound(0);
    boundaries << min_index << arma::endr << max_index;

    min = abscissa(min_index);
    max = abscissa(max_index);

    arma::uword size = abscissa.subvec(min_index, max_index).n_elem;
    arma::vec results(X.n_cols);
    midlines.set_size(X.n_cols, size);
    baselines.set_size(X.n_cols, size);
    arma::vec midline;
    arma::vec baseline;
    for (arma::uword i = 0; i < X.n_cols; ++i){
        results(i) = FindBandwidth(X.col(i), min_index, max_index, midline, baseline, delta);
        midlines.row(i) = midline;
        baselines.row(i) = baseline;
    }

    return results;
}
Пример #2
0
void FastMKS<KernelType, TreeType>::Search(TreeType* queryTree,
                                           const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // If either naive mode or single mode is specified, this must fail.
  if (naive || singleMode)
  {
    throw std::invalid_argument("can't call Search() with a query tree when "
        "single mode or naive search is enabled");
  }

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, queryTree->Dataset().n_cols);
  kernels.set_size(k, queryTree->Dataset().n_cols);
  kernels.fill(-DBL_MAX);

  Timer::Start("computing_products");
  typedef FastMKSRules<KernelType, TreeType> RuleType;
  RuleType rules(referenceSet, queryTree->Dataset(), indices, kernels,
      metric.Kernel());

  typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

  traverser.Traverse(*queryTree, *referenceTree);

  Log::Info << rules.BaseCases() << " base cases." << std::endl;
  Log::Info << rules.Scores() << " scores." << std::endl;

  Timer::Stop("computing_products");
}
Пример #3
0
/**
 * Create a Lovasz-Theta initial point.
 */
void CreateLovaszThetaInitialPoint(const arma::mat& edges,
                                   arma::mat& coordinates)
{
  // Get the number of vertices in the problem.
  const size_t vertices = max(max(edges)) + 1;

  const size_t m = edges.n_cols + 1;
  float r = 0.5 + sqrt(0.25 + 2 * m);
  if (ceil(r) > vertices)
    r = vertices; // An upper bound on the dimension.

  coordinates.set_size(vertices, ceil(r));

  // Now we set the entries of the initial matrix according to the formula given
  // in Section 4 of Monteiro and Burer.
  for (size_t i = 0; i < vertices; ++i)
  {
    for (size_t j = 0; j < ceil(r); ++j)
    {
      if (i == j)
        coordinates(i, j) = sqrt(1.0 / r) + sqrt(1.0 / (vertices * m));
      else
        coordinates(i, j) = sqrt(1.0 / (vertices * m));
    }
  }
}
Пример #4
0
arma::mat Vespucci::Math::Transform::cwtPeakAnalysis(const arma::mat &X,
                              std::string wavelet, arma::uword qscale,
                              double threshold, std::string threshold_method,
                              arma::mat &transform)
{
    transform.set_size(X.n_rows, X.n_cols);
    arma::uvec scales(1);
    scales(0) = qscale;
    arma::uword i;
    arma::umat peak_positions;
    arma::mat peak_extrema(X.n_rows, X.n_cols);
    arma::vec spectrum, current_transform, dtransform, peak_magnitudes;
    try{
        for (i = 0; i < X.n_cols; ++i){
            spectrum = X.col(i);
            current_transform = Vespucci::Math::Transform::cwt(spectrum, wavelet, scales);
            transform.col(i) = current_transform;
            dtransform = Vespucci::Math::diff(transform, 1);
            dtransform.insert_rows(0, 1, true); //dtransform(i) is derivative at transform(i)
            peak_positions = Vespucci::Math::PeakFinding::FindPeakPositions(transform, dtransform,
                                                         threshold, threshold_method,
                                                         peak_magnitudes);
            peak_extrema.col(i) = Vespucci::Math::PeakFinding::PeakExtrema(X.n_elem, peak_positions);
        }
    }
    catch(std::exception e){
        std::cerr << "CWTPeakAnalysis" << std::endl;
        std::cerr << "i = " << i << std::endl;
        throw(e);
    }
    return peak_extrema;
}
Пример #5
0
arma::mat Vespucci::Math::Transform::cwt_spdbc_mat(const arma::mat &X, std::string wavelet, arma::uword qscale,
                            double threshold, std::string threshold_method,
                            arma::uword window_size, arma::field<arma::umat> &peak_positions,
                            arma::mat &baselines)
{
    baselines.set_size(X.n_rows, X.n_cols);
    arma::vec baseline;
    arma::vec spectrum;
    arma::vec current_corrected;
    arma::mat corrected(X.n_rows, X.n_cols);
    arma::umat current_peakpos;
    peak_positions.set_size(X.n_cols);
    arma::uword i;
    try{
        for (i = 0; i < X.n_cols; ++i){
            spectrum = X.col(i);
            current_corrected = Vespucci::Math::Transform::cwt_spdbc(spectrum, wavelet,
                                                   qscale, threshold,
                                                   threshold_method, window_size,
                                                   current_peakpos, baseline);

            peak_positions(i) = current_peakpos;
            baselines.col(i) = baseline;
            corrected.col(i) = current_corrected;
        }
    }catch(std::exception e){
        std::cerr << std::endl << "exception! cwt_spdbc_mat" << std::endl;
        std::cerr << "i = " << i << std::endl;
        std::cerr << e.what();
        throw(e);
    }

    return corrected;
}
/**
 * Calculates and stores the gradient values given a set of parameters.
 */
void SoftmaxRegressionFunction::Gradient(const arma::mat& parameters,
                                         arma::mat& gradient) const
{
  // Calculate the class probabilities for each training example. The
  // probabilities for each of the classes are given by:
  // p_j = exp(theta_j' * x_i) / sum(exp(theta_k' * x_i))
  // The sum is calculated over all the classes.
  // x_i is the input vector for a particular training example.
  // theta_j is the parameter vector associated with a particular class.
  arma::mat probabilities;
  GetProbabilitiesMatrix(parameters, probabilities);

  // Calculate the parameter gradients.
  gradient.set_size(parameters.n_rows, parameters.n_cols);
  if (fitIntercept)
  {
    // Treating the intercept term parameters.col(0) seperately to avoid
    // the cost of building matrix [1; data].
    arma::mat inner = probabilities - groundTruth;
    gradient.col(0) =
      inner * arma::ones<arma::mat>(data.n_cols, 1) / data.n_cols +
      lambda * parameters.col(0);
    gradient.cols(1, parameters.n_cols - 1) =
      inner * data.t() / data.n_cols +
      lambda * parameters.cols(1, parameters.n_cols - 1);
  }
  else
  {
    gradient = (probabilities - groundTruth) * data.t() / data.n_cols +
               lambda * parameters;
  }
}
void AugLagrangianTestFunction::Gradient(const arma::mat& coordinates,
                                         arma::mat& gradient)
{
  // f'_x1(x) = 12 x_1 + 4 x_2
  // f'_x2(x) = 4 x_1 + 6 x_2
  gradient.set_size(2, 1);

  gradient[0] = 12 * coordinates[0] + 4 * coordinates[1];
  gradient[1] = 4 * coordinates[0] + 6 * coordinates[1];
}
Пример #8
0
void LogisticRegression<MatType>::Classify(const MatType& dataset,
                                           arma::mat& probabilities) const
{
  // Set correct size of output matrix.
  probabilities.set_size(2, dataset.n_cols);

  probabilities.row(1) = 1.0 / (1.0 + arma::exp(-parameters(0) - dataset.t() *
      parameters.subvec(1, parameters.n_elem - 1))).t();
  probabilities.row(0) = 1.0 - probabilities.row(1);
}
Пример #9
0
 inline static void Cluster(const MatType& data,
                            const size_t clusters,
                            arma::mat& centroids)
 {
   centroids.set_size(data.n_rows, clusters);
   for (size_t i = 0; i < clusters; ++i)
   {
     // Randomly sample a point.
     const size_t index = math::RandInt(0, data.n_cols);
     centroids.col(i) = data.col(index);
   }
 }
void GockenbachFunction::Gradient(const arma::mat& coordinates,
                                  arma::mat& gradient)
{
  // f'_x1(x) = 2 (x_1 - 1)
  // f'_x2(x) = 4 (x_2 + 2)
  // f'_x3(x) = 6 (x_3 + 3)
  gradient.set_size(3, 1);

  gradient[0] = 2 * (coordinates[0] - 1);
  gradient[1] = 4 * (coordinates[1] + 2);
  gradient[2] = 6 * (coordinates[2] + 3);
}
Пример #11
0
/**
 * Remove a certain set of rows in a matrix while copying to a second matrix.
 *
 * @param input Input matrix to copy.
 * @param rowsToRemove Vector containing indices of rows to be removed.
 * @param output Matrix to copy non-removed rows into.
 */
void mlpack::math::RemoveRows(const arma::mat& input,
                              const std::vector<size_t>& rowsToRemove,
                              arma::mat& output)
{
  const size_t nRemove = rowsToRemove.size();
  const size_t nKeep = input.n_rows - nRemove;

  if (nRemove == 0)
  {
    output = input; // Copy everything.
  }
  else
  {
    output.set_size(nKeep, input.n_cols);

    size_t curRow = 0;
    size_t removeInd = 0;
    // First, check 0 to first row to remove.
    if (rowsToRemove[0] > 0)
    {
      // Note that this implies that n_rows > 1.
      output.rows(0, rowsToRemove[0] - 1) = input.rows(0, rowsToRemove[0] - 1);
      curRow += rowsToRemove[0];
    }

    // Now, check i'th row to remove to (i + 1)'th row to remove, until i is the
    // penultimate row.
    while (removeInd < nRemove - 1)
    {
      const size_t height = rowsToRemove[removeInd + 1] -
          rowsToRemove[removeInd] - 1;

      if (height > 0)
      {
        output.rows(curRow, curRow + height - 1) =
            input.rows(rowsToRemove[removeInd] + 1,
                       rowsToRemove[removeInd + 1] - 1);
        curRow += height;
      }

      removeInd++;
    }

    // Now that i is the last row to remove, check last row to remove to last
    // row.
    if (rowsToRemove[removeInd] < input.n_rows - 1)
    {
      output.rows(curRow, nKeep - 1) = input.rows(rowsToRemove[removeInd] + 1,
          input.n_rows - 1);
    }
  }
}
Пример #12
0
/**
 * Calculate the gradient.
 */
void RosenbrockFunction::Gradient(const arma::mat& coordinates,
                                  arma::mat& gradient)
{
  // f'_{x1}(x) = -2 (1 - x1) + 400 (x1^3 - (x2 x1))
  // f'_{x2}(x) = 200 (x2 - x1^2)

  double x1 = coordinates[0];
  double x2 = coordinates[1];

  gradient.set_size(2, 1);
  gradient[0] = -2 * (1 - x1) + 400 * (std::pow(x1, 3) - x2 * x1);
  gradient[1] = 200 * (x2 - std::pow(x1, 2));
}
Пример #13
0
/***
 * Calculate the gradient.
 */
void RosenbrockWoodFunction::Gradient(const arma::mat& coordinates,
                                      arma::mat& gradient)
{
  gradient.set_size(4, 2);

  arma::vec grf(4);
  arma::vec gwf(4);

  rf.Gradient(coordinates.col(0), grf);
  wf.Gradient(coordinates.col(1), gwf);

  gradient.col(0) = grf;
  gradient.col(1) = gwf;
}
void LogisticRegressionFunction<MatType>::Gradient(
    const arma::mat& parameters,
    arma::mat& gradient) const
{
  // Regularization term.
  arma::mat regularization;
  regularization = lambda * parameters.tail_cols(parameters.n_elem - 1);

  const arma::rowvec sigmoids = (1 / (1 + arma::exp(-parameters(0, 0)
      - parameters.tail_cols(parameters.n_elem - 1) * predictors)));

  gradient.set_size(arma::size(parameters));
  gradient[0] = -arma::accu(responses - sigmoids);
  gradient.tail_cols(parameters.n_elem - 1) = (sigmoids - responses) *
      predictors.t() + regularization;
}
Пример #15
0
/**
 * Calculate the gradient.
 */
void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates,
                                             arma::mat& gradient) const
{
  gradient.set_size(n);
  for (int i = 0; i < (n - 1); i++)
  {
    gradient[i] = 400 * (std::pow(coordinates[i], 3) - coordinates[i] *
        coordinates[i + 1]) + 2 * (coordinates[i] - 1);

    if (i > 0)
      gradient[i] += 200 * (coordinates[i] - std::pow(coordinates[i - 1], 2));
  }

  gradient[n - 1] = 200 * (coordinates[n - 1] -
      std::pow(coordinates[n - 2], 2));
}
Пример #16
0
void NormalizeColByMax(const arma::mat &input,
                       arma::mat &output)
{
  output.set_size(input.n_rows, input.n_cols);
  for (arma::uword i = 0; i != input.n_cols; ++i)
  {
    const double max = arma::max(arma::abs(input.col(i)));
    if (max != 0.0)
    {
      output.col(i) = input.col(i) / max;
    }
    else
    {
      output.col(i) = input.col(i);
    }
  }
}
Пример #17
0
void DualTreeBoruvka<MetricType, TreeType>::EmitResults(arma::mat& results)
{
  // Sort the edges.
  std::sort(edges.begin(), edges.end(), SortFun);

  Log::Assert(edges.size() == data.n_cols - 1);
  results.set_size(3, edges.size());

  // Need to unpermute the point labels.
  if (!naive && ownTree && tree::TreeTraits<TreeType>::RearrangesDataset)
  {
    for (size_t i = 0; i < (data.n_cols - 1); i++)
    {
      // Make sure the edge list stores the smaller index first to
      // make checking correctness easier
      size_t ind1 = oldFromNew[edges[i].Lesser()];
      size_t ind2 = oldFromNew[edges[i].Greater()];

      if (ind1 < ind2)
      {
        edges[i].Lesser() = ind1;
        edges[i].Greater() = ind2;
      }
      else
      {
        edges[i].Lesser() = ind2;
        edges[i].Greater() = ind1;
      }

      results(0, i) = edges[i].Lesser();
      results(1, i) = edges[i].Greater();
      results(2, i) = edges[i].Distance();
    }
  }
  else
  {
    for (size_t i = 0; i < edges.size(); i++)
    {
      results(0, i) = edges[i].Lesser();
      results(1, i) = edges[i].Greater();
      results(2, i) = edges[i].Distance();
    }
  }
} // EmitResults
  /**
   * Initialize the dictionary by adding together three random observations from
   * the data, and then normalizing the atom.  This implementation is simple
   * enough to be included with the definition.
   *
   * @param data Dataset to initialize the dictionary with.
   * @param atoms Number of atoms in dictionary.
   * @param dictionary Dictionary to initialize.
   */
  static void Initialize(const arma::mat& data,
                         const size_t atoms,
                         arma::mat& dictionary)
  {
    // Set the size of the dictionary.
    dictionary.set_size(data.n_rows, atoms);

    // Create each atom.
    for (size_t i = 0; i < atoms; ++i)
    {
      // Add three atoms together.
      dictionary.col(i) = (data.col(math::RandInt(data.n_cols)) +
          data.col(math::RandInt(data.n_cols)) +
          data.col(math::RandInt(data.n_cols)));

      // Now normalize the atom.
      dictionary.col(i) /= norm(dictionary.col(i), 2);
    }
  }
Пример #19
0
void pca::read_armmat (const char * fname, arma::mat & cmtx)
{
  int n, m;
  std::ifstream myfilec(fname, std::ios::binary | std::ios::in);
  myfilec.read((char *)&n, sizeof(n));
  myfilec.read((char *)&m, sizeof(m));
  cmtx.set_size(n, m);
  for (int i=0; i<(int)cmtx.n_rows; i++)
  {
    for (int j=0; j<(int)cmtx.n_cols; j++)
    {
      double v;
      myfilec.read((char *)&v, sizeof(v));

      cmtx(i, j) = v;
    }
  }
  myfilec.close();
}
Пример #20
0
void HMM<Distribution>::Generate(const size_t length,
                                 arma::mat& dataSequence,
                                 arma::Col<size_t>& stateSequence,
                                 const size_t startState) const
{
  // Set vectors to the right size.
  stateSequence.set_size(length);
  dataSequence.set_size(dimensionality, length);

  // Set start state (default is 0).
  stateSequence[0] = startState;

  // Choose first emission state.
  double randValue = math::Random();

  // We just have to find where our random value sits in the probability
  // distribution of emissions for our starting state.
  dataSequence.col(0) = emission[startState].Random();

  // Now choose the states and emissions for the rest of the sequence.
  for (size_t t = 1; t < length; t++)
  {
    // First choose the hidden state.
    randValue = math::Random();

    // Now find where our random value sits in the probability distribution of
    // state changes.
    double probSum = 0;
    for (size_t st = 0; st < transition.n_rows; st++)
    {
      probSum += transition(st, stateSequence[t - 1]);
      if (randValue <= probSum)
      {
        stateSequence[t] = st;
        break;
      }
    }

    // Now choose the emission.
    dataSequence.col(t) = emission[stateSequence[t]].Random();
  }
}
Пример #21
0
void LSHSearch<SortPolicy>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const size_t numTablesToSearch)
{
  // Set the size of the neighbor and distance matrices.
  resultingNeighbors.set_size(k, querySet.n_cols);
  distances.set_size(k, querySet.n_cols);
  distances.fill(SortPolicy::WorstDistance());
  resultingNeighbors.fill(referenceSet.n_cols);

  size_t avgIndicesReturned = 0;

  Timer::Start("computing_neighbors");

  // Go through every query point sequentially.
  for (size_t i = 0; i < querySet.n_cols; i++)
  {
    // Hash every query into every hash table and eventually into the
    // 'secondHashTable' to obtain the neighbor candidates.
    arma::uvec refIndices;
    ReturnIndicesFromTable(i, refIndices, numTablesToSearch);

    // An informative book-keeping for the number of neighbor candidates
    // returned on average.
    avgIndicesReturned += refIndices.n_elem;

    // Sequentially go through all the candidates and save the best 'k'
    // candidates.
    for (size_t j = 0; j < refIndices.n_elem; j++)
      BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]);
  }

  Timer::Stop("computing_neighbors");

  distanceEvaluations += avgIndicesReturned;
  avgIndicesReturned /= querySet.n_cols;
  Log::Info << avgIndicesReturned << " distinct indices returned on average." <<
      std::endl;
}
void CylindricalBirefringentMaterial::dielectricTensorInPrincCrdSyst( const meep::vec &r, arma::mat &eps, const BirefringentEps &biref ) const
{
  eps.set_size(3,3);
  arma::vec rTrans;
  transformR(r,rTrans);
  for ( unsigned int i=0;i<3;i++ )
  {
    eps(princAxis,i) = 0.0;
    eps(i,princAxis) = 0.0;
  }
  eps(princAxis,princAxis) = biref.ordinary;
  unsigned int indx1 = (princAxis+1)%3;
  unsigned int indx2 = (princAxis+2)%3;
  double R = sqrt( pow(rTrans(indx1),2) + pow(rTrans(indx2),2 ));
  double cosTheta = rTrans(indx1)/R;
  double sinTheta = rTrans(indx2)/R;
  eps(indx1,indx1) = biref.extraOrdinary*cosTheta*cosTheta + biref.ordinary*sinTheta*sinTheta;
  eps(indx2,indx2) = biref.extraOrdinary*sinTheta*sinTheta + biref.ordinary*cosTheta*cosTheta;
  eps(indx1,indx2) = (biref.extraOrdinary - biref.ordinary)*sinTheta*cosTheta;
  eps(indx2,indx1) = eps(indx1,indx2);
}
Пример #23
0
/**
 * Calculate the gradient.
 */
void WoodFunction::Gradient(const arma::mat& coordinates,
                            arma::mat& gradient)
{
  // For convenience; we assume these temporaries will be optimized out.
  double x1 = coordinates[0];
  double x2 = coordinates[1];
  double x3 = coordinates[2];
  double x4 = coordinates[3];

  // f'_{x1}(x) = 400 (x1^3 - x2 x1) - 2 (1 - x1)
  // f'_{x2}(x) = 200 (x2 - x1^2) + 20 (x2 + x4 - 2) + (1 / 5) (x2 - x4)
  // f'_{x3}(x) = 360 (x3^3 - x4 x3) - 2 (1 - x3)
  // f'_{x4}(x) = 180 (x4 - x3^2) + 20 (x2 + x4 - 2) - (1 / 5) (x2 - x4)
  gradient.set_size(4, 1);
  gradient[0] = 400 * (std::pow(x1, 3) - x2 * x1) - 2 * (1 - x1);
  gradient[1] = 200 * (x2 - std::pow(x1, 2)) + 20 * (x2 + x4 - 2) +
      (1.0 / 5.0) * (x2 - x4);
  gradient[2] = 360 * (std::pow(x3, 3) - x4 * x3) - 2 * (1 - x3);
  gradient[3] = 180 * (x4 - std::pow(x3, 2)) + 20 * (x2 + x4 - 2) -
      (1.0 / 5.0) * (x2 - x4);
}
Пример #24
0
void SparseCoding::Encode(const arma::mat& data, arma::mat& codes)
{
    // When using the Cholesky version of LARS, this is correct even if
    // lambda2 > 0.
    arma::mat matGram = trans(dictionary) * dictionary;

    codes.set_size(atoms, data.n_cols);
    for (size_t i = 0; i < data.n_cols; ++i)
    {
        // Report progress.
        if ((i % 100) == 0)
            Log::Debug << "Optimization at point " << i << "." << std::endl;

        bool useCholesky = true;
        regression::LARS lars(useCholesky, matGram, lambda1, lambda2);

        // Create an alias of the code (using the same memory), and then LARS will
        // place the result directly into that; then we will not need to have an
        // extra copy.
        arma::vec code = codes.unsafe_col(i);
        lars.Train(dictionary, data.unsafe_col(i), code, false);
    }
}
Пример #25
0
void RASearch<SortPolicy, MetricType, TreeType>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const double tau,
       const double alpha,
       const bool sampleAtLeaves,
       const bool firstLeafExact,
       const size_t singleSampleLimit)
{


    // If we have built the trees ourselves, then we will have to map all the
    // indices back to their original indices when this computation is finished.
    // To avoid an extra copy, we will store the neighbors and distances in a
    // separate matrix.
    arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
    arma::mat* distancePtr = &distances;

    // Mapping is only required if this tree type rearranges points and we are not
    // in naive mode.
    if (tree::TreeTraits<TreeType>::RearrangesDataset)
    {
        if (treeOwner && !(singleMode && hasQuerySet))
            distancePtr = new arma::mat; // Query indices need to be mapped.
        if (treeOwner)
            neighborPtr = new arma::Mat<size_t>; // All indices need mapping.
    }

    // Set the size of the neighbor and distance matrices.
    neighborPtr->set_size(k, querySet.n_cols);
    distancePtr->set_size(k, querySet.n_cols);
    distancePtr->fill(SortPolicy::WorstDistance());

    size_t numPrunes = 0;

    if (naive)
    {
        // We don't need to run the base case on every possible combination of
        // points; we can achieve the rank approximation guarantee with probability
        // alpha by sampling the reference set.
        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        // Find how many samples from the reference set we need and sample uniformly
        // from the reference set without replacement.
        const size_t numSamples = rules.MinimumSamplesReqd(referenceSet.n_cols, k,
                                  tau, alpha);
        arma::uvec distinctSamples;
        rules.ObtainDistinctSamples(numSamples, referenceSet.n_cols,
                                    distinctSamples);

        // Run the base case on each combination of query point and sampled
        // reference point.
        for (size_t i = 0; i < querySet.n_cols; ++i)
            for (size_t j = 0; j < distinctSamples.n_elem; ++j)
                rules.BaseCase(i, (size_t) distinctSamples[j]);
    }
    else if (singleMode)
    {
        // Create the helper object for the tree traversal.  Initialization of
        // RASearchRules already implicitly performs the naive tree traversal.
        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        // If the reference root node is a leaf, then the sampling has already been
        // done in the RASearchRules constructor.  This happens when naive = true.
        if (!referenceTree->IsLeaf())
        {
            Rcpp::Rcout << "Performing single-tree traversal..." << std::endl;

            // Create the traverser.
            typename TreeType::template SingleTreeTraverser<RuleType>
            traverser(rules);

            // Now have it traverse for each point.
            for (size_t i = 0; i < querySet.n_cols; ++i)
                traverser.Traverse(i, *referenceTree);

            numPrunes = traverser.NumPrunes();

            Rcpp::Rcout << "Single-tree traversal complete." << std::endl;
            Rcpp::Rcout << "Average number of distance calculations per query point: "
                        << (rules.NumDistComputations() / querySet.n_cols) << "."
                        << std::endl;
        }
    }
    else // Dual-tree recursion.
    {
        Rcpp::Rcout << "Performing dual-tree traversal..." << std::endl;

        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

        if (queryTree)
        {
            Rcpp::Rcout << "Query statistic pre-search: "
                        << queryTree->Stat().NumSamplesMade() << std::endl;
            traverser.Traverse(*queryTree, *referenceTree);
        }
        else
        {
            Rcpp::Rcout << "Query statistic pre-search: "
                        << referenceTree->Stat().NumSamplesMade() << std::endl;
            traverser.Traverse(*referenceTree, *referenceTree);
        }

        numPrunes = traverser.NumPrunes();

        Rcpp::Rcout << "Dual-tree traversal complete." << std::endl;
        Rcpp::Rcout << "Average number of distance calculations per query point: "
                    << (rules.NumDistComputations() / querySet.n_cols) << "." << std::endl;
    }


    Rcpp::Rcout << "Pruned " << numPrunes << " nodes." << std::endl;

    // Now, do we need to do mapping of indices?
    if (!treeOwner || !tree::TreeTraits<TreeType>::RearrangesDataset)
    {
        // No mapping needed.  We are done.
        return;
    }
    else if (treeOwner && hasQuerySet && !singleMode) // Map both sets.
    {
        // Set size of output matrices correctly.
        resultingNeighbors.set_size(k, querySet.n_cols);
        distances.set_size(k, querySet.n_cols);

        for (size_t i = 0; i < distances.n_cols; i++)
        {
            // Map distances (copy a column).
            distances.col(oldFromNewQueries[i]) = distancePtr->col(i);

            // Map indices of neighbors.
            for (size_t j = 0; j < distances.n_rows; j++)
            {
                resultingNeighbors(j, oldFromNewQueries[i]) =
                    oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }

        // Finished with temporary matrices.
        delete neighborPtr;
        delete distancePtr;
    }
    else if (treeOwner && !hasQuerySet)
    {
        // No query tree -- map both references and queries.
        resultingNeighbors.set_size(k, querySet.n_cols);
        distances.set_size(k, querySet.n_cols);

        for (size_t i = 0; i < distances.n_cols; i++)
        {
            // Map distances (copy a column).
            distances.col(oldFromNewReferences[i]) = distancePtr->col(i);

            // Map indices of neighbors.
            for (size_t j = 0; j < distances.n_rows; j++)
            {
                resultingNeighbors(j, oldFromNewReferences[i]) =
                    oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }
    }
    else if (treeOwner && hasQuerySet && singleMode) // Map only references.
    {
        // Set size of neighbor indices matrix correctly.
        resultingNeighbors.set_size(k, querySet.n_cols);

        // Map indices of neighbors.
        for (size_t i = 0; i < resultingNeighbors.n_cols; i++)
        {
            for (size_t j = 0; j < resultingNeighbors.n_rows; j++)
            {
                resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }

        // Finished with temporary matrix.
        delete neighborPtr;
    }
} // Search
Пример #26
0
void FastMKS<KernelType, TreeType>::Search(const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // No remapping will be necessary because we are using the cover tree.
  Timer::Start("computing_products");
  indices.set_size(k, referenceSet.n_cols);
  kernels.set_size(k, referenceSet.n_cols);
  kernels.fill(-DBL_MAX);

  // Naive implementation.
  if (naive)
  {
    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < referenceSet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        if (q == r)
          continue; // Don't return the point as its own candidate.

        const double eval = metric.Kernel().Evaluate(referenceSet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, referenceSet, indices, kernels,
        metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < referenceSet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    // Save the number of pruned nodes.
    const size_t numPrunes = traverser.NumPrunes();

    Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.
  Timer::Stop("computing_products");

  Search(referenceTree, k, indices, kernels);
}
Пример #27
0
void FastMKS<KernelType, TreeType>::Search(
    const typename TreeType::Mat& querySet,
    const size_t k,
    arma::Mat<size_t>& indices,
    arma::mat& kernels)
{
  Timer::Start("computing_products");

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, querySet.n_cols);
  kernels.set_size(k, querySet.n_cols);

  // Naive implementation.
  if (naive)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < querySet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        const double eval = metric.Kernel().Evaluate(querySet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, querySet, indices, kernels, metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < querySet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.  First, we need to build the query tree.  We are
  // assuming it doesn't map anything...
  Timer::Stop("computing_products");
  Timer::Start("tree_building");
  TreeType queryTree(querySet);
  Timer::Stop("tree_building");

  Search(&queryTree, k, indices, kernels);
}
Пример #28
0
///
/// \brief Vespucci::MathDimensionReduction::plsregress PLS Regression Using SIMPLS algorithm.
/// This is essentially a line-for-line rewrite of plsregress from the Octave
/// statistics package.
/// Copyright (C) 2012 Fernando Damian Nieuwveldt <*****@*****.**>
/// This is an implementation of the SIMPLS algorithm:
/// Reference: SIMPLS: An alternative approach to partial least squares regression.
/// Chemometrics and Intelligent Laboratory Systems (1993)
/// \param x
/// \param y
/// \param components Number of PLS components
/// \param x_loadings "Output" values
/// \param y_loadings
/// \param x_scores
/// \param y_scores
/// \param coefficients
/// \param fitted_data
/// \return
///
bool Vespucci::Math::DimensionReduction::plsregress(arma::mat X, arma::mat Y,
                                                    int components, arma::mat &X_loadings,
                                                    arma::mat &Y_loadings,
                                                    arma::mat &X_scores,
                                                    arma::mat &Y_scores,
                                                    arma::mat &coefficients,
                                                    arma::mat &percent_variance,
                                                    arma::mat &fitted)
{
    using namespace arma;
    uword observations = X.n_rows;
    uword predictors = X.n_cols;
    uword responses = Y.n_cols;
    //Mean centering
    mat Xmeans = arma::mean(X);
    mat Ymeans = arma::mean(Y);
    //Octave does below with bsxfun. After optimization, this should hopefully not
    // be slower.
    X.each_row() -= Xmeans; //element-wise subtraction of mean
    Y.each_row() -= Ymeans; //same

    mat S = trans(X) * Y;
    mat R = zeros(predictors, components);
    mat P = R;
    mat V = R;
    mat T = zeros(observations, components);
    mat U = T;
    mat Q = zeros(responses, components);
    mat eigvec;
    vec eigval;

    mat q;
    mat r;
    mat t;
    mat nt;
    mat p;
    mat u;
    mat v;
    double max_eigval;
    for (int i = 0; i < components; ++i){
        eig_sym(eigval, eigvec, (trans(S) * S));
        max_eigval = eigval.max();
        uvec dom_index = find(eigval >= max_eigval);
        uword dominant_index = dom_index(0);

        q = eigvec.col(dominant_index);

        r = S*q; //X block factor weights
        t = X*r; //X block factor weights
        t.each_row() -= mean(t); //center t
        nt = arma::sqrt(t.t()*t); //compute norm (is arma::norm() the same?)
        t.each_row() /= nt;
        r.each_row() /= nt; //normalize

        p = X.t()*t; //X block factor loadings
        q = Y.t()*t; //Y block factor loadings
        u = Y*q; //Y block factor scores
        v = p;

        //Ensure orthogonality
        if (i > 0){
            v = v - V*(V.t()*p);
            u = u - T*(T.t()*u);
        }
        v.each_row() /= arma::sqrt(trans(v) * v); //normalize orthogonal loadings
        S = S - v * (trans(v)*S); //deflate S wrt loadings
        R.col(i) = r;
        T.col(i) = t;
        P.col(i) = p;
        Q.col(i) = q;
        U.col(i) = u;
        V.col(i) = v;
    }

    //Regression coefficients
    mat B = R*trans(Q);
    fitted = T*trans(Q);
    fitted.each_row() += Ymeans;

    //Octave creates copies from inputs before sending to output. Doing same
    //here just to be safe.
    coefficients = B;
    X_scores = T;
    X_loadings = P;
    Y_scores = U;
    Y_loadings = Q;
    //projection = R;
    percent_variance.set_size(2, coefficients.n_cols);
    percent_variance.row(0) = sum(arma::abs(P)%arma::abs(P)) / sum(sum(arma::abs(X)%arma::abs(X)));
    percent_variance.row(1) = sum(arma::abs(Q)%arma::abs(Q)) / sum(sum(arma::abs(Y)%arma::abs(Y)));
    return true;

}
Пример #29
0
void RAModel<SortPolicy>::Search(arma::mat&& querySet,
                                 const size_t k,
                                 arma::Mat<size_t>& neighbors,
                                 arma::mat& distances)
{
  // Apply the random basis if necessary.
  if (randomBasis)
    querySet = q * querySet;

  Log::Info << "Searching for " << k << " approximate nearest neighbors with ";
  if (!Naive() && !SingleMode())
    Log::Info << "dual-tree rank-approximate " << TreeName() << " search...";
  else if (!Naive())
    Log::Info << "single-tree rank-approximate " << TreeName() << " search...";
  else
    Log::Info << "brute-force (naive) rank-approximate search...";
  Log::Info << std::endl;

  switch (treeType)
  {
    case KD_TREE:
      if (!kdTreeRA->Naive() && !kdTreeRA->SingleMode())
      {
        // Build a second tree and search.
        Timer::Start("tree_building");
        Log::Info << "Building query tree..." << std::endl;
        std::vector<size_t> oldFromNewQueries;
        typename RAType<tree::KDTree>::Tree queryTree(std::move(querySet),
            oldFromNewQueries, leafSize);
        Log::Info << "Tree built." << std::endl;
        Timer::Stop("tree_building");

        arma::Mat<size_t> neighborsOut;
        arma::mat distancesOut;
        kdTreeRA->Search(&queryTree, k, neighborsOut, distancesOut);

        // Unmap the query points.
        distances.set_size(distancesOut.n_rows, distancesOut.n_cols);
        neighbors.set_size(neighborsOut.n_rows, neighborsOut.n_cols);
        for (size_t i = 0; i < neighborsOut.n_cols; ++i)
        {
          neighbors.col(oldFromNewQueries[i]) = neighborsOut.col(i);
          distances.col(oldFromNewQueries[i]) = distancesOut.col(i);
        }
      }
      else
      {
        // Search without building a second tree.
        kdTreeRA->Search(querySet, k, neighbors, distances);
      }
      break;
    case COVER_TREE:
      // No mapping necessary.
      coverTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case R_TREE:
      // No mapping necessary.
      rTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case R_STAR_TREE:
      // No mapping necessary.
      rStarTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case X_TREE:
      // No mapping necessary.
      xTreeRA->Search(querySet, k, neighbors, distances);
      break;
  }
}
Пример #30
0
void NeighborSearch<SortPolicy, MetricType, TreeType>::Search(
    const size_t k,
    arma::Mat<size_t>& resultingNeighbors,
    arma::mat& distances)
{
  Timer::Start("computing_neighbors");

  // If we have built the trees ourselves, then we will have to map all the
  // indices back to their original indices when this computation is finished.
  // To avoid an extra copy, we will store the neighbors and distances in a
  // separate matrix.
  arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
  arma::mat* distancePtr = &distances;

  if (treeOwner && !(singleMode && hasQuerySet))
    distancePtr = new arma::mat; // Query indices need to be mapped.
  if (treeOwner)
    neighborPtr = new arma::Mat<size_t>; // All indices need mapping.

  // Set the size of the neighbor and distance matrices.
  neighborPtr->set_size(k, querySet.n_cols);
  distancePtr->set_size(k, querySet.n_cols);
  distancePtr->fill(SortPolicy::WorstDistance());

  size_t numPrunes = 0;

  // Create the helper object for the tree traversal.
  typedef NeighborSearchRules<SortPolicy, MetricType, TreeType> RuleType;
  RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric);

  if (singleMode)
  {
    // Create the traverser.
    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    // Now have it traverse for each point.
    for (size_t i = 0; i < querySet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);
  }
  else // Dual-tree recursion.
  {
    // Create the traverser.
    typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

    traverser.Traverse(*queryTree, *referenceTree);

    Log::Info << traverser.NumVisited() << " node combinations were visited.\n";
    Log::Info << traverser.NumScores() << " node combinations were scored.\n";
    Log::Info << traverser.NumBaseCases() << " base cases were calculated.\n";
  }

  Timer::Stop("computing_neighbors");

  // Now, do we need to do mapping of indices?
  if (!treeOwner)
  {
    // No mapping needed.  We are done.
    return;
  }
  else if (treeOwner && hasQuerySet && !singleMode) // Map both sets.
  {
    // Set size of output matrices correctly.
    resultingNeighbors.set_size(k, querySet.n_cols);
    distances.set_size(k, querySet.n_cols);

    for (size_t i = 0; i < distances.n_cols; i++)
    {
      // Map distances (copy a column).
      distances.col(oldFromNewQueries[i]) = distancePtr->col(i);

      // Map indices of neighbors.
      for (size_t j = 0; j < distances.n_rows; j++)
      {
        resultingNeighbors(j, oldFromNewQueries[i]) =
            oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }

    // Finished with temporary matrices.
    delete neighborPtr;
    delete distancePtr;
  }
  else if (treeOwner && !hasQuerySet)
  {
    resultingNeighbors.set_size(k, querySet.n_cols);
    distances.set_size(k, querySet.n_cols);

    for (size_t i = 0; i < distances.n_cols; i++)
    {
      // Map distances (copy a column).
      distances.col(oldFromNewReferences[i]) = distancePtr->col(i);

      // Map indices of neighbors.
      for (size_t j = 0; j < distances.n_rows; j++)
      {
        resultingNeighbors(j, oldFromNewReferences[i]) =
            oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }
  }
  else if (treeOwner && hasQuerySet && singleMode) // Map only references.
  {
    // Set size of neighbor indices matrix correctly.
    resultingNeighbors.set_size(k, querySet.n_cols);

    // Map indices of neighbors.
    for (size_t i = 0; i < resultingNeighbors.n_cols; i++)
    {
      for (size_t j = 0; j < resultingNeighbors.n_rows; j++)
      {
        resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }

    // Finished with temporary matrix.
    delete neighborPtr;
  }
} // Search