/// /// \brief Vespucci::Math::Quantification::FindBandwidthMat /// \param X /// \param abscissa /// \param min /// \param max /// \param midlines /// \param baselines /// \return /// Finds the bandwidth of every column of a arma::matrix. arma::vec Vespucci::Math::Quantification::FindBandwidthMat(const arma::mat &X, arma::vec abscissa, double &min, double &max, arma::mat &midlines, arma::mat &baselines, arma::uvec &boundaries) { double delta = std::abs(abscissa(1) - abscissa(0)); arma::uvec left_bound = find(((min-delta) <= abscissa) && (abscissa <= (min+delta))); arma::uvec right_bound = find(((max-delta) <= abscissa) && (abscissa <= (max+delta))); arma::uword min_index = left_bound(0); arma::uword max_index = right_bound(0); boundaries << min_index << arma::endr << max_index; min = abscissa(min_index); max = abscissa(max_index); arma::uword size = abscissa.subvec(min_index, max_index).n_elem; arma::vec results(X.n_cols); midlines.set_size(X.n_cols, size); baselines.set_size(X.n_cols, size); arma::vec midline; arma::vec baseline; for (arma::uword i = 0; i < X.n_cols; ++i){ results(i) = FindBandwidth(X.col(i), min_index, max_index, midline, baseline, delta); midlines.row(i) = midline; baselines.row(i) = baseline; } return results; }
void FastMKS<KernelType, TreeType>::Search(TreeType* queryTree, const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { // If either naive mode or single mode is specified, this must fail. if (naive || singleMode) { throw std::invalid_argument("can't call Search() with a query tree when " "single mode or naive search is enabled"); } // No remapping will be necessary because we are using the cover tree. indices.set_size(k, queryTree->Dataset().n_cols); kernels.set_size(k, queryTree->Dataset().n_cols); kernels.fill(-DBL_MAX); Timer::Start("computing_products"); typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, queryTree->Dataset(), indices, kernels, metric.Kernel()); typename TreeType::template DualTreeTraverser<RuleType> traverser(rules); traverser.Traverse(*queryTree, *referenceTree); Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); }
/** * Create a Lovasz-Theta initial point. */ void CreateLovaszThetaInitialPoint(const arma::mat& edges, arma::mat& coordinates) { // Get the number of vertices in the problem. const size_t vertices = max(max(edges)) + 1; const size_t m = edges.n_cols + 1; float r = 0.5 + sqrt(0.25 + 2 * m); if (ceil(r) > vertices) r = vertices; // An upper bound on the dimension. coordinates.set_size(vertices, ceil(r)); // Now we set the entries of the initial matrix according to the formula given // in Section 4 of Monteiro and Burer. for (size_t i = 0; i < vertices; ++i) { for (size_t j = 0; j < ceil(r); ++j) { if (i == j) coordinates(i, j) = sqrt(1.0 / r) + sqrt(1.0 / (vertices * m)); else coordinates(i, j) = sqrt(1.0 / (vertices * m)); } } }
arma::mat Vespucci::Math::Transform::cwtPeakAnalysis(const arma::mat &X, std::string wavelet, arma::uword qscale, double threshold, std::string threshold_method, arma::mat &transform) { transform.set_size(X.n_rows, X.n_cols); arma::uvec scales(1); scales(0) = qscale; arma::uword i; arma::umat peak_positions; arma::mat peak_extrema(X.n_rows, X.n_cols); arma::vec spectrum, current_transform, dtransform, peak_magnitudes; try{ for (i = 0; i < X.n_cols; ++i){ spectrum = X.col(i); current_transform = Vespucci::Math::Transform::cwt(spectrum, wavelet, scales); transform.col(i) = current_transform; dtransform = Vespucci::Math::diff(transform, 1); dtransform.insert_rows(0, 1, true); //dtransform(i) is derivative at transform(i) peak_positions = Vespucci::Math::PeakFinding::FindPeakPositions(transform, dtransform, threshold, threshold_method, peak_magnitudes); peak_extrema.col(i) = Vespucci::Math::PeakFinding::PeakExtrema(X.n_elem, peak_positions); } } catch(std::exception e){ std::cerr << "CWTPeakAnalysis" << std::endl; std::cerr << "i = " << i << std::endl; throw(e); } return peak_extrema; }
arma::mat Vespucci::Math::Transform::cwt_spdbc_mat(const arma::mat &X, std::string wavelet, arma::uword qscale, double threshold, std::string threshold_method, arma::uword window_size, arma::field<arma::umat> &peak_positions, arma::mat &baselines) { baselines.set_size(X.n_rows, X.n_cols); arma::vec baseline; arma::vec spectrum; arma::vec current_corrected; arma::mat corrected(X.n_rows, X.n_cols); arma::umat current_peakpos; peak_positions.set_size(X.n_cols); arma::uword i; try{ for (i = 0; i < X.n_cols; ++i){ spectrum = X.col(i); current_corrected = Vespucci::Math::Transform::cwt_spdbc(spectrum, wavelet, qscale, threshold, threshold_method, window_size, current_peakpos, baseline); peak_positions(i) = current_peakpos; baselines.col(i) = baseline; corrected.col(i) = current_corrected; } }catch(std::exception e){ std::cerr << std::endl << "exception! cwt_spdbc_mat" << std::endl; std::cerr << "i = " << i << std::endl; std::cerr << e.what(); throw(e); } return corrected; }
/** * Calculates and stores the gradient values given a set of parameters. */ void SoftmaxRegressionFunction::Gradient(const arma::mat& parameters, arma::mat& gradient) const { // Calculate the class probabilities for each training example. The // probabilities for each of the classes are given by: // p_j = exp(theta_j' * x_i) / sum(exp(theta_k' * x_i)) // The sum is calculated over all the classes. // x_i is the input vector for a particular training example. // theta_j is the parameter vector associated with a particular class. arma::mat probabilities; GetProbabilitiesMatrix(parameters, probabilities); // Calculate the parameter gradients. gradient.set_size(parameters.n_rows, parameters.n_cols); if (fitIntercept) { // Treating the intercept term parameters.col(0) seperately to avoid // the cost of building matrix [1; data]. arma::mat inner = probabilities - groundTruth; gradient.col(0) = inner * arma::ones<arma::mat>(data.n_cols, 1) / data.n_cols + lambda * parameters.col(0); gradient.cols(1, parameters.n_cols - 1) = inner * data.t() / data.n_cols + lambda * parameters.cols(1, parameters.n_cols - 1); } else { gradient = (probabilities - groundTruth) * data.t() / data.n_cols + lambda * parameters; } }
void AugLagrangianTestFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) { // f'_x1(x) = 12 x_1 + 4 x_2 // f'_x2(x) = 4 x_1 + 6 x_2 gradient.set_size(2, 1); gradient[0] = 12 * coordinates[0] + 4 * coordinates[1]; gradient[1] = 4 * coordinates[0] + 6 * coordinates[1]; }
void LogisticRegression<MatType>::Classify(const MatType& dataset, arma::mat& probabilities) const { // Set correct size of output matrix. probabilities.set_size(2, dataset.n_cols); probabilities.row(1) = 1.0 / (1.0 + arma::exp(-parameters(0) - dataset.t() * parameters.subvec(1, parameters.n_elem - 1))).t(); probabilities.row(0) = 1.0 - probabilities.row(1); }
inline static void Cluster(const MatType& data, const size_t clusters, arma::mat& centroids) { centroids.set_size(data.n_rows, clusters); for (size_t i = 0; i < clusters; ++i) { // Randomly sample a point. const size_t index = math::RandInt(0, data.n_cols); centroids.col(i) = data.col(index); } }
void GockenbachFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) { // f'_x1(x) = 2 (x_1 - 1) // f'_x2(x) = 4 (x_2 + 2) // f'_x3(x) = 6 (x_3 + 3) gradient.set_size(3, 1); gradient[0] = 2 * (coordinates[0] - 1); gradient[1] = 4 * (coordinates[1] + 2); gradient[2] = 6 * (coordinates[2] + 3); }
/** * Remove a certain set of rows in a matrix while copying to a second matrix. * * @param input Input matrix to copy. * @param rowsToRemove Vector containing indices of rows to be removed. * @param output Matrix to copy non-removed rows into. */ void mlpack::math::RemoveRows(const arma::mat& input, const std::vector<size_t>& rowsToRemove, arma::mat& output) { const size_t nRemove = rowsToRemove.size(); const size_t nKeep = input.n_rows - nRemove; if (nRemove == 0) { output = input; // Copy everything. } else { output.set_size(nKeep, input.n_cols); size_t curRow = 0; size_t removeInd = 0; // First, check 0 to first row to remove. if (rowsToRemove[0] > 0) { // Note that this implies that n_rows > 1. output.rows(0, rowsToRemove[0] - 1) = input.rows(0, rowsToRemove[0] - 1); curRow += rowsToRemove[0]; } // Now, check i'th row to remove to (i + 1)'th row to remove, until i is the // penultimate row. while (removeInd < nRemove - 1) { const size_t height = rowsToRemove[removeInd + 1] - rowsToRemove[removeInd] - 1; if (height > 0) { output.rows(curRow, curRow + height - 1) = input.rows(rowsToRemove[removeInd] + 1, rowsToRemove[removeInd + 1] - 1); curRow += height; } removeInd++; } // Now that i is the last row to remove, check last row to remove to last // row. if (rowsToRemove[removeInd] < input.n_rows - 1) { output.rows(curRow, nKeep - 1) = input.rows(rowsToRemove[removeInd] + 1, input.n_rows - 1); } } }
/** * Calculate the gradient. */ void RosenbrockFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) { // f'_{x1}(x) = -2 (1 - x1) + 400 (x1^3 - (x2 x1)) // f'_{x2}(x) = 200 (x2 - x1^2) double x1 = coordinates[0]; double x2 = coordinates[1]; gradient.set_size(2, 1); gradient[0] = -2 * (1 - x1) + 400 * (std::pow(x1, 3) - x2 * x1); gradient[1] = 200 * (x2 - std::pow(x1, 2)); }
/*** * Calculate the gradient. */ void RosenbrockWoodFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) { gradient.set_size(4, 2); arma::vec grf(4); arma::vec gwf(4); rf.Gradient(coordinates.col(0), grf); wf.Gradient(coordinates.col(1), gwf); gradient.col(0) = grf; gradient.col(1) = gwf; }
void LogisticRegressionFunction<MatType>::Gradient( const arma::mat& parameters, arma::mat& gradient) const { // Regularization term. arma::mat regularization; regularization = lambda * parameters.tail_cols(parameters.n_elem - 1); const arma::rowvec sigmoids = (1 / (1 + arma::exp(-parameters(0, 0) - parameters.tail_cols(parameters.n_elem - 1) * predictors))); gradient.set_size(arma::size(parameters)); gradient[0] = -arma::accu(responses - sigmoids); gradient.tail_cols(parameters.n_elem - 1) = (sigmoids - responses) * predictors.t() + regularization; }
/** * Calculate the gradient. */ void GeneralizedRosenbrockFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) const { gradient.set_size(n); for (int i = 0; i < (n - 1); i++) { gradient[i] = 400 * (std::pow(coordinates[i], 3) - coordinates[i] * coordinates[i + 1]) + 2 * (coordinates[i] - 1); if (i > 0) gradient[i] += 200 * (coordinates[i] - std::pow(coordinates[i - 1], 2)); } gradient[n - 1] = 200 * (coordinates[n - 1] - std::pow(coordinates[n - 2], 2)); }
void NormalizeColByMax(const arma::mat &input, arma::mat &output) { output.set_size(input.n_rows, input.n_cols); for (arma::uword i = 0; i != input.n_cols; ++i) { const double max = arma::max(arma::abs(input.col(i))); if (max != 0.0) { output.col(i) = input.col(i) / max; } else { output.col(i) = input.col(i); } } }
void DualTreeBoruvka<MetricType, TreeType>::EmitResults(arma::mat& results) { // Sort the edges. std::sort(edges.begin(), edges.end(), SortFun); Log::Assert(edges.size() == data.n_cols - 1); results.set_size(3, edges.size()); // Need to unpermute the point labels. if (!naive && ownTree && tree::TreeTraits<TreeType>::RearrangesDataset) { for (size_t i = 0; i < (data.n_cols - 1); i++) { // Make sure the edge list stores the smaller index first to // make checking correctness easier size_t ind1 = oldFromNew[edges[i].Lesser()]; size_t ind2 = oldFromNew[edges[i].Greater()]; if (ind1 < ind2) { edges[i].Lesser() = ind1; edges[i].Greater() = ind2; } else { edges[i].Lesser() = ind2; edges[i].Greater() = ind1; } results(0, i) = edges[i].Lesser(); results(1, i) = edges[i].Greater(); results(2, i) = edges[i].Distance(); } } else { for (size_t i = 0; i < edges.size(); i++) { results(0, i) = edges[i].Lesser(); results(1, i) = edges[i].Greater(); results(2, i) = edges[i].Distance(); } } } // EmitResults
/** * Initialize the dictionary by adding together three random observations from * the data, and then normalizing the atom. This implementation is simple * enough to be included with the definition. * * @param data Dataset to initialize the dictionary with. * @param atoms Number of atoms in dictionary. * @param dictionary Dictionary to initialize. */ static void Initialize(const arma::mat& data, const size_t atoms, arma::mat& dictionary) { // Set the size of the dictionary. dictionary.set_size(data.n_rows, atoms); // Create each atom. for (size_t i = 0; i < atoms; ++i) { // Add three atoms together. dictionary.col(i) = (data.col(math::RandInt(data.n_cols)) + data.col(math::RandInt(data.n_cols)) + data.col(math::RandInt(data.n_cols))); // Now normalize the atom. dictionary.col(i) /= norm(dictionary.col(i), 2); } }
void pca::read_armmat (const char * fname, arma::mat & cmtx) { int n, m; std::ifstream myfilec(fname, std::ios::binary | std::ios::in); myfilec.read((char *)&n, sizeof(n)); myfilec.read((char *)&m, sizeof(m)); cmtx.set_size(n, m); for (int i=0; i<(int)cmtx.n_rows; i++) { for (int j=0; j<(int)cmtx.n_cols; j++) { double v; myfilec.read((char *)&v, sizeof(v)); cmtx(i, j) = v; } } myfilec.close(); }
void HMM<Distribution>::Generate(const size_t length, arma::mat& dataSequence, arma::Col<size_t>& stateSequence, const size_t startState) const { // Set vectors to the right size. stateSequence.set_size(length); dataSequence.set_size(dimensionality, length); // Set start state (default is 0). stateSequence[0] = startState; // Choose first emission state. double randValue = math::Random(); // We just have to find where our random value sits in the probability // distribution of emissions for our starting state. dataSequence.col(0) = emission[startState].Random(); // Now choose the states and emissions for the rest of the sequence. for (size_t t = 1; t < length; t++) { // First choose the hidden state. randValue = math::Random(); // Now find where our random value sits in the probability distribution of // state changes. double probSum = 0; for (size_t st = 0; st < transition.n_rows; st++) { probSum += transition(st, stateSequence[t - 1]); if (randValue <= probSum) { stateSequence[t] = st; break; } } // Now choose the emission. dataSequence.col(t) = emission[stateSequence[t]].Random(); } }
void LSHSearch<SortPolicy>:: Search(const size_t k, arma::Mat<size_t>& resultingNeighbors, arma::mat& distances, const size_t numTablesToSearch) { // Set the size of the neighbor and distance matrices. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); distances.fill(SortPolicy::WorstDistance()); resultingNeighbors.fill(referenceSet.n_cols); size_t avgIndicesReturned = 0; Timer::Start("computing_neighbors"); // Go through every query point sequentially. for (size_t i = 0; i < querySet.n_cols; i++) { // Hash every query into every hash table and eventually into the // 'secondHashTable' to obtain the neighbor candidates. arma::uvec refIndices; ReturnIndicesFromTable(i, refIndices, numTablesToSearch); // An informative book-keeping for the number of neighbor candidates // returned on average. avgIndicesReturned += refIndices.n_elem; // Sequentially go through all the candidates and save the best 'k' // candidates. for (size_t j = 0; j < refIndices.n_elem; j++) BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]); } Timer::Stop("computing_neighbors"); distanceEvaluations += avgIndicesReturned; avgIndicesReturned /= querySet.n_cols; Log::Info << avgIndicesReturned << " distinct indices returned on average." << std::endl; }
void CylindricalBirefringentMaterial::dielectricTensorInPrincCrdSyst( const meep::vec &r, arma::mat &eps, const BirefringentEps &biref ) const { eps.set_size(3,3); arma::vec rTrans; transformR(r,rTrans); for ( unsigned int i=0;i<3;i++ ) { eps(princAxis,i) = 0.0; eps(i,princAxis) = 0.0; } eps(princAxis,princAxis) = biref.ordinary; unsigned int indx1 = (princAxis+1)%3; unsigned int indx2 = (princAxis+2)%3; double R = sqrt( pow(rTrans(indx1),2) + pow(rTrans(indx2),2 )); double cosTheta = rTrans(indx1)/R; double sinTheta = rTrans(indx2)/R; eps(indx1,indx1) = biref.extraOrdinary*cosTheta*cosTheta + biref.ordinary*sinTheta*sinTheta; eps(indx2,indx2) = biref.extraOrdinary*sinTheta*sinTheta + biref.ordinary*cosTheta*cosTheta; eps(indx1,indx2) = (biref.extraOrdinary - biref.ordinary)*sinTheta*cosTheta; eps(indx2,indx1) = eps(indx1,indx2); }
/** * Calculate the gradient. */ void WoodFunction::Gradient(const arma::mat& coordinates, arma::mat& gradient) { // For convenience; we assume these temporaries will be optimized out. double x1 = coordinates[0]; double x2 = coordinates[1]; double x3 = coordinates[2]; double x4 = coordinates[3]; // f'_{x1}(x) = 400 (x1^3 - x2 x1) - 2 (1 - x1) // f'_{x2}(x) = 200 (x2 - x1^2) + 20 (x2 + x4 - 2) + (1 / 5) (x2 - x4) // f'_{x3}(x) = 360 (x3^3 - x4 x3) - 2 (1 - x3) // f'_{x4}(x) = 180 (x4 - x3^2) + 20 (x2 + x4 - 2) - (1 / 5) (x2 - x4) gradient.set_size(4, 1); gradient[0] = 400 * (std::pow(x1, 3) - x2 * x1) - 2 * (1 - x1); gradient[1] = 200 * (x2 - std::pow(x1, 2)) + 20 * (x2 + x4 - 2) + (1.0 / 5.0) * (x2 - x4); gradient[2] = 360 * (std::pow(x3, 3) - x4 * x3) - 2 * (1 - x3); gradient[3] = 180 * (x4 - std::pow(x3, 2)) + 20 * (x2 + x4 - 2) - (1.0 / 5.0) * (x2 - x4); }
void SparseCoding::Encode(const arma::mat& data, arma::mat& codes) { // When using the Cholesky version of LARS, this is correct even if // lambda2 > 0. arma::mat matGram = trans(dictionary) * dictionary; codes.set_size(atoms, data.n_cols); for (size_t i = 0; i < data.n_cols; ++i) { // Report progress. if ((i % 100) == 0) Log::Debug << "Optimization at point " << i << "." << std::endl; bool useCholesky = true; regression::LARS lars(useCholesky, matGram, lambda1, lambda2); // Create an alias of the code (using the same memory), and then LARS will // place the result directly into that; then we will not need to have an // extra copy. arma::vec code = codes.unsafe_col(i); lars.Train(dictionary, data.unsafe_col(i), code, false); } }
void RASearch<SortPolicy, MetricType, TreeType>:: Search(const size_t k, arma::Mat<size_t>& resultingNeighbors, arma::mat& distances, const double tau, const double alpha, const bool sampleAtLeaves, const bool firstLeafExact, const size_t singleSampleLimit) { // If we have built the trees ourselves, then we will have to map all the // indices back to their original indices when this computation is finished. // To avoid an extra copy, we will store the neighbors and distances in a // separate matrix. arma::Mat<size_t>* neighborPtr = &resultingNeighbors; arma::mat* distancePtr = &distances; // Mapping is only required if this tree type rearranges points and we are not // in naive mode. if (tree::TreeTraits<TreeType>::RearrangesDataset) { if (treeOwner && !(singleMode && hasQuerySet)) distancePtr = new arma::mat; // Query indices need to be mapped. if (treeOwner) neighborPtr = new arma::Mat<size_t>; // All indices need mapping. } // Set the size of the neighbor and distance matrices. neighborPtr->set_size(k, querySet.n_cols); distancePtr->set_size(k, querySet.n_cols); distancePtr->fill(SortPolicy::WorstDistance()); size_t numPrunes = 0; if (naive) { // We don't need to run the base case on every possible combination of // points; we can achieve the rank approximation guarantee with probability // alpha by sampling the reference set. typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType; RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact, singleSampleLimit); // Find how many samples from the reference set we need and sample uniformly // from the reference set without replacement. const size_t numSamples = rules.MinimumSamplesReqd(referenceSet.n_cols, k, tau, alpha); arma::uvec distinctSamples; rules.ObtainDistinctSamples(numSamples, referenceSet.n_cols, distinctSamples); // Run the base case on each combination of query point and sampled // reference point. for (size_t i = 0; i < querySet.n_cols; ++i) for (size_t j = 0; j < distinctSamples.n_elem; ++j) rules.BaseCase(i, (size_t) distinctSamples[j]); } else if (singleMode) { // Create the helper object for the tree traversal. Initialization of // RASearchRules already implicitly performs the naive tree traversal. typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType; RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact, singleSampleLimit); // If the reference root node is a leaf, then the sampling has already been // done in the RASearchRules constructor. This happens when naive = true. if (!referenceTree->IsLeaf()) { Rcpp::Rcout << "Performing single-tree traversal..." << std::endl; // Create the traverser. typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); // Now have it traverse for each point. for (size_t i = 0; i < querySet.n_cols; ++i) traverser.Traverse(i, *referenceTree); numPrunes = traverser.NumPrunes(); Rcpp::Rcout << "Single-tree traversal complete." << std::endl; Rcpp::Rcout << "Average number of distance calculations per query point: " << (rules.NumDistComputations() / querySet.n_cols) << "." << std::endl; } } else // Dual-tree recursion. { Rcpp::Rcout << "Performing dual-tree traversal..." << std::endl; typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType; RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric, tau, alpha, sampleAtLeaves, firstLeafExact, singleSampleLimit); typename TreeType::template DualTreeTraverser<RuleType> traverser(rules); if (queryTree) { Rcpp::Rcout << "Query statistic pre-search: " << queryTree->Stat().NumSamplesMade() << std::endl; traverser.Traverse(*queryTree, *referenceTree); } else { Rcpp::Rcout << "Query statistic pre-search: " << referenceTree->Stat().NumSamplesMade() << std::endl; traverser.Traverse(*referenceTree, *referenceTree); } numPrunes = traverser.NumPrunes(); Rcpp::Rcout << "Dual-tree traversal complete." << std::endl; Rcpp::Rcout << "Average number of distance calculations per query point: " << (rules.NumDistComputations() / querySet.n_cols) << "." << std::endl; } Rcpp::Rcout << "Pruned " << numPrunes << " nodes." << std::endl; // Now, do we need to do mapping of indices? if (!treeOwner || !tree::TreeTraits<TreeType>::RearrangesDataset) { // No mapping needed. We are done. return; } else if (treeOwner && hasQuerySet && !singleMode) // Map both sets. { // Set size of output matrices correctly. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); for (size_t i = 0; i < distances.n_cols; i++) { // Map distances (copy a column). distances.col(oldFromNewQueries[i]) = distancePtr->col(i); // Map indices of neighbors. for (size_t j = 0; j < distances.n_rows; j++) { resultingNeighbors(j, oldFromNewQueries[i]) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } // Finished with temporary matrices. delete neighborPtr; delete distancePtr; } else if (treeOwner && !hasQuerySet) { // No query tree -- map both references and queries. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); for (size_t i = 0; i < distances.n_cols; i++) { // Map distances (copy a column). distances.col(oldFromNewReferences[i]) = distancePtr->col(i); // Map indices of neighbors. for (size_t j = 0; j < distances.n_rows; j++) { resultingNeighbors(j, oldFromNewReferences[i]) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } } else if (treeOwner && hasQuerySet && singleMode) // Map only references. { // Set size of neighbor indices matrix correctly. resultingNeighbors.set_size(k, querySet.n_cols); // Map indices of neighbors. for (size_t i = 0; i < resultingNeighbors.n_cols; i++) { for (size_t j = 0; j < resultingNeighbors.n_rows; j++) { resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } // Finished with temporary matrix. delete neighborPtr; } } // Search
void FastMKS<KernelType, TreeType>::Search(const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { // No remapping will be necessary because we are using the cover tree. Timer::Start("computing_products"); indices.set_size(k, referenceSet.n_cols); kernels.set_size(k, referenceSet.n_cols); kernels.fill(-DBL_MAX); // Naive implementation. if (naive) { // Simple double loop. Stupid, slow, but a good benchmark. for (size_t q = 0; q < referenceSet.n_cols; ++q) { for (size_t r = 0; r < referenceSet.n_cols; ++r) { if (q == r) continue; // Don't return the point as its own candidate. const double eval = metric.Kernel().Evaluate(referenceSet.col(q), referenceSet.col(r)); size_t insertPosition; for (insertPosition = 0; insertPosition < indices.n_rows; ++insertPosition) if (eval > kernels(insertPosition, q)) break; if (insertPosition < indices.n_rows) InsertNeighbor(indices, kernels, q, insertPosition, r, eval); } } Timer::Stop("computing_products"); return; } // Single-tree implementation. if (singleMode) { // Create rules object (this will store the results). This constructor // precalculates each self-kernel value. typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, referenceSet, indices, kernels, metric.Kernel()); typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); for (size_t i = 0; i < referenceSet.n_cols; ++i) traverser.Traverse(i, *referenceTree); // Save the number of pruned nodes. const size_t numPrunes = traverser.NumPrunes(); Log::Info << "Pruned " << numPrunes << " nodes." << std::endl; Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); return; } // Dual-tree implementation. Timer::Stop("computing_products"); Search(referenceTree, k, indices, kernels); }
void FastMKS<KernelType, TreeType>::Search( const typename TreeType::Mat& querySet, const size_t k, arma::Mat<size_t>& indices, arma::mat& kernels) { Timer::Start("computing_products"); // No remapping will be necessary because we are using the cover tree. indices.set_size(k, querySet.n_cols); kernels.set_size(k, querySet.n_cols); // Naive implementation. if (naive) { // Fill kernels. kernels.fill(-DBL_MAX); // Simple double loop. Stupid, slow, but a good benchmark. for (size_t q = 0; q < querySet.n_cols; ++q) { for (size_t r = 0; r < referenceSet.n_cols; ++r) { const double eval = metric.Kernel().Evaluate(querySet.col(q), referenceSet.col(r)); size_t insertPosition; for (insertPosition = 0; insertPosition < indices.n_rows; ++insertPosition) if (eval > kernels(insertPosition, q)) break; if (insertPosition < indices.n_rows) InsertNeighbor(indices, kernels, q, insertPosition, r, eval); } } Timer::Stop("computing_products"); return; } // Single-tree implementation. if (singleMode) { // Fill kernels. kernels.fill(-DBL_MAX); // Create rules object (this will store the results). This constructor // precalculates each self-kernel value. typedef FastMKSRules<KernelType, TreeType> RuleType; RuleType rules(referenceSet, querySet, indices, kernels, metric.Kernel()); typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); for (size_t i = 0; i < querySet.n_cols; ++i) traverser.Traverse(i, *referenceTree); Log::Info << rules.BaseCases() << " base cases." << std::endl; Log::Info << rules.Scores() << " scores." << std::endl; Timer::Stop("computing_products"); return; } // Dual-tree implementation. First, we need to build the query tree. We are // assuming it doesn't map anything... Timer::Stop("computing_products"); Timer::Start("tree_building"); TreeType queryTree(querySet); Timer::Stop("tree_building"); Search(&queryTree, k, indices, kernels); }
/// /// \brief Vespucci::MathDimensionReduction::plsregress PLS Regression Using SIMPLS algorithm. /// This is essentially a line-for-line rewrite of plsregress from the Octave /// statistics package. /// Copyright (C) 2012 Fernando Damian Nieuwveldt <*****@*****.**> /// This is an implementation of the SIMPLS algorithm: /// Reference: SIMPLS: An alternative approach to partial least squares regression. /// Chemometrics and Intelligent Laboratory Systems (1993) /// \param x /// \param y /// \param components Number of PLS components /// \param x_loadings "Output" values /// \param y_loadings /// \param x_scores /// \param y_scores /// \param coefficients /// \param fitted_data /// \return /// bool Vespucci::Math::DimensionReduction::plsregress(arma::mat X, arma::mat Y, int components, arma::mat &X_loadings, arma::mat &Y_loadings, arma::mat &X_scores, arma::mat &Y_scores, arma::mat &coefficients, arma::mat &percent_variance, arma::mat &fitted) { using namespace arma; uword observations = X.n_rows; uword predictors = X.n_cols; uword responses = Y.n_cols; //Mean centering mat Xmeans = arma::mean(X); mat Ymeans = arma::mean(Y); //Octave does below with bsxfun. After optimization, this should hopefully not // be slower. X.each_row() -= Xmeans; //element-wise subtraction of mean Y.each_row() -= Ymeans; //same mat S = trans(X) * Y; mat R = zeros(predictors, components); mat P = R; mat V = R; mat T = zeros(observations, components); mat U = T; mat Q = zeros(responses, components); mat eigvec; vec eigval; mat q; mat r; mat t; mat nt; mat p; mat u; mat v; double max_eigval; for (int i = 0; i < components; ++i){ eig_sym(eigval, eigvec, (trans(S) * S)); max_eigval = eigval.max(); uvec dom_index = find(eigval >= max_eigval); uword dominant_index = dom_index(0); q = eigvec.col(dominant_index); r = S*q; //X block factor weights t = X*r; //X block factor weights t.each_row() -= mean(t); //center t nt = arma::sqrt(t.t()*t); //compute norm (is arma::norm() the same?) t.each_row() /= nt; r.each_row() /= nt; //normalize p = X.t()*t; //X block factor loadings q = Y.t()*t; //Y block factor loadings u = Y*q; //Y block factor scores v = p; //Ensure orthogonality if (i > 0){ v = v - V*(V.t()*p); u = u - T*(T.t()*u); } v.each_row() /= arma::sqrt(trans(v) * v); //normalize orthogonal loadings S = S - v * (trans(v)*S); //deflate S wrt loadings R.col(i) = r; T.col(i) = t; P.col(i) = p; Q.col(i) = q; U.col(i) = u; V.col(i) = v; } //Regression coefficients mat B = R*trans(Q); fitted = T*trans(Q); fitted.each_row() += Ymeans; //Octave creates copies from inputs before sending to output. Doing same //here just to be safe. coefficients = B; X_scores = T; X_loadings = P; Y_scores = U; Y_loadings = Q; //projection = R; percent_variance.set_size(2, coefficients.n_cols); percent_variance.row(0) = sum(arma::abs(P)%arma::abs(P)) / sum(sum(arma::abs(X)%arma::abs(X))); percent_variance.row(1) = sum(arma::abs(Q)%arma::abs(Q)) / sum(sum(arma::abs(Y)%arma::abs(Y))); return true; }
void RAModel<SortPolicy>::Search(arma::mat&& querySet, const size_t k, arma::Mat<size_t>& neighbors, arma::mat& distances) { // Apply the random basis if necessary. if (randomBasis) querySet = q * querySet; Log::Info << "Searching for " << k << " approximate nearest neighbors with "; if (!Naive() && !SingleMode()) Log::Info << "dual-tree rank-approximate " << TreeName() << " search..."; else if (!Naive()) Log::Info << "single-tree rank-approximate " << TreeName() << " search..."; else Log::Info << "brute-force (naive) rank-approximate search..."; Log::Info << std::endl; switch (treeType) { case KD_TREE: if (!kdTreeRA->Naive() && !kdTreeRA->SingleMode()) { // Build a second tree and search. Timer::Start("tree_building"); Log::Info << "Building query tree..." << std::endl; std::vector<size_t> oldFromNewQueries; typename RAType<tree::KDTree>::Tree queryTree(std::move(querySet), oldFromNewQueries, leafSize); Log::Info << "Tree built." << std::endl; Timer::Stop("tree_building"); arma::Mat<size_t> neighborsOut; arma::mat distancesOut; kdTreeRA->Search(&queryTree, k, neighborsOut, distancesOut); // Unmap the query points. distances.set_size(distancesOut.n_rows, distancesOut.n_cols); neighbors.set_size(neighborsOut.n_rows, neighborsOut.n_cols); for (size_t i = 0; i < neighborsOut.n_cols; ++i) { neighbors.col(oldFromNewQueries[i]) = neighborsOut.col(i); distances.col(oldFromNewQueries[i]) = distancesOut.col(i); } } else { // Search without building a second tree. kdTreeRA->Search(querySet, k, neighbors, distances); } break; case COVER_TREE: // No mapping necessary. coverTreeRA->Search(querySet, k, neighbors, distances); break; case R_TREE: // No mapping necessary. rTreeRA->Search(querySet, k, neighbors, distances); break; case R_STAR_TREE: // No mapping necessary. rStarTreeRA->Search(querySet, k, neighbors, distances); break; case X_TREE: // No mapping necessary. xTreeRA->Search(querySet, k, neighbors, distances); break; } }
void NeighborSearch<SortPolicy, MetricType, TreeType>::Search( const size_t k, arma::Mat<size_t>& resultingNeighbors, arma::mat& distances) { Timer::Start("computing_neighbors"); // If we have built the trees ourselves, then we will have to map all the // indices back to their original indices when this computation is finished. // To avoid an extra copy, we will store the neighbors and distances in a // separate matrix. arma::Mat<size_t>* neighborPtr = &resultingNeighbors; arma::mat* distancePtr = &distances; if (treeOwner && !(singleMode && hasQuerySet)) distancePtr = new arma::mat; // Query indices need to be mapped. if (treeOwner) neighborPtr = new arma::Mat<size_t>; // All indices need mapping. // Set the size of the neighbor and distance matrices. neighborPtr->set_size(k, querySet.n_cols); distancePtr->set_size(k, querySet.n_cols); distancePtr->fill(SortPolicy::WorstDistance()); size_t numPrunes = 0; // Create the helper object for the tree traversal. typedef NeighborSearchRules<SortPolicy, MetricType, TreeType> RuleType; RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric); if (singleMode) { // Create the traverser. typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules); // Now have it traverse for each point. for (size_t i = 0; i < querySet.n_cols; ++i) traverser.Traverse(i, *referenceTree); } else // Dual-tree recursion. { // Create the traverser. typename TreeType::template DualTreeTraverser<RuleType> traverser(rules); traverser.Traverse(*queryTree, *referenceTree); Log::Info << traverser.NumVisited() << " node combinations were visited.\n"; Log::Info << traverser.NumScores() << " node combinations were scored.\n"; Log::Info << traverser.NumBaseCases() << " base cases were calculated.\n"; } Timer::Stop("computing_neighbors"); // Now, do we need to do mapping of indices? if (!treeOwner) { // No mapping needed. We are done. return; } else if (treeOwner && hasQuerySet && !singleMode) // Map both sets. { // Set size of output matrices correctly. resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); for (size_t i = 0; i < distances.n_cols; i++) { // Map distances (copy a column). distances.col(oldFromNewQueries[i]) = distancePtr->col(i); // Map indices of neighbors. for (size_t j = 0; j < distances.n_rows; j++) { resultingNeighbors(j, oldFromNewQueries[i]) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } // Finished with temporary matrices. delete neighborPtr; delete distancePtr; } else if (treeOwner && !hasQuerySet) { resultingNeighbors.set_size(k, querySet.n_cols); distances.set_size(k, querySet.n_cols); for (size_t i = 0; i < distances.n_cols; i++) { // Map distances (copy a column). distances.col(oldFromNewReferences[i]) = distancePtr->col(i); // Map indices of neighbors. for (size_t j = 0; j < distances.n_rows; j++) { resultingNeighbors(j, oldFromNewReferences[i]) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } } else if (treeOwner && hasQuerySet && singleMode) // Map only references. { // Set size of neighbor indices matrix correctly. resultingNeighbors.set_size(k, querySet.n_cols); // Map indices of neighbors. for (size_t i = 0; i < resultingNeighbors.n_cols; i++) { for (size_t j = 0; j < resultingNeighbors.n_rows; j++) { resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)]; } } // Finished with temporary matrix. delete neighborPtr; } } // Search