const BallBound<VecType>& BallBound<VecType>::operator|=(const MatType& data) { if (radius < 0) { center = data.col(0); radius = 0; } // Now iteratively add points. There is probably a closed-form solution to // find the minimum bounding circle, and it is probably faster. for (size_t i = 1; i < data.n_cols; ++i) { double dist = metric::EuclideanDistance::Evaluate(center, (VecType) data.col(i)) - radius; if (dist > 0) { // Move (dist / 2) towards the new point and increase radius by // (dist / 2). arma::vec diff = data.col(i) - center; center += 0.5 * diff; radius += 0.5 * dist; } } return *this; }
inline static void Initialize(const MatType& V, const size_t r, arma::mat& W, arma::mat& H) { size_t n = V.n_rows; size_t m = V.n_cols; double V_avg = 0; size_t count = 0; double min = DBL_MAX; for(typename MatType::const_row_col_iterator it = V.begin();it != V.end();it++) { if(*it != 0) { count++; V_avg += *it; if(*it < min) min = *it; } } V_avg = sqrt(((V_avg / (n * m)) - min) / r); // Intialize to random values. W.randu(n, r); H.randu(r, m); W = W + V_avg; H = H + V_avg; }
void ClassificationTree::print_train_log(const TreeNode::PtrSplitNodeBase split, const TrainingSet &train_set) const { MatType ltype = train_set.get_label_type(); MatType ftype = train_set.get_feature_type(); int rows = (int)ltype.total(); cv::Mat_<double> left_tmp; cv::Mat_<double> right_tmp; TrainingSet left_set(ftype, ltype); TrainingSet right_set(ftype, ltype); split->operator()(train_set, left_set, right_set); left_set.compute_target_mean(left_tmp); right_set.compute_target_mean(right_tmp); cv::Mat_<double> left_dist(rows, 1, (double*)left_tmp.data); cv::Mat_<double> right_dist(rows, 1, (double*)right_tmp.data); printf("left dist\n"); for (unsigned ii = 0; ii < left_dist.total(); ++ii) { printf("\tlabel%d:%f\n", ii, left_dist.at<double>(ii) / left_set.size()); } printf("right dist\n"); for (unsigned ii = 0; ii < right_dist.total(); ++ii) { printf("\tlabel%d:%f\n", ii, right_dist.at<double>(ii) / right_set.size()); } }
inline std::string DimsString( const MatType& A, std::string label="Matrix" ) { std::ostringstream os; os << label << " ~ " << A.Height() << " x " << A.Width(); return os.str(); }
inline static void Initialize(const MatType& V, const size_t r, arma::mat& W, arma::mat& H) { const size_t n = V.n_rows; const size_t m = V.n_cols; double avgV = 0; size_t count = 0; double min = DBL_MAX; // Iterate over all elements in the matrix (for sparse matrices, this only // iterates over nonzeros). for (typename MatType::const_row_col_iterator it = V.begin(); it != V.end(); ++it) { ++count; avgV += *it; // Track the minimum value. if (*it < min) min = *it; } avgV = sqrt(((avgV / (n * m)) - min) / r); // Initialize to random values. W.randu(n, r); H.randu(r, m); W = W + avgV; H = H + avgV; }
const BallBound<MetricType, VecType>& BallBound<MetricType, VecType>::operator|=(const MatType& data) { if (radius < 0) { center = data.col(0); radius = 0; } // Now iteratively add points. for (size_t i = 0; i < data.n_cols; ++i) { const ElemType dist = metric->Evaluate(center, (VecType) data.col(i)); // See if the new point lies outside the bound. if (dist > radius) { // Move towards the new point and increase the radius just enough to // accommodate the new point. const VecType diff = data.col(i) - center; center += ((dist - radius) / (2 * dist)) * diff; radius = 0.5 * (dist + radius); } } return *this; }
SubMatrix(MatType& m) : matrix(m), begin_row(0), end_row(m.numRows()), begin_column(0), end_column(m.numColumns()) { }
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data, const size_t emptyCluster, const arma::mat& oldCentroids, arma::mat& newCentroids, arma::Col<size_t>& clusterCounts, MetricType& metric, const size_t iteration) { // If necessary, calculate the variances and assignments. if (iteration != this->iteration || assignments.n_elem != data.n_cols) Precalculate(data, oldCentroids, clusterCounts, metric); this->iteration = iteration; // Now find the cluster with maximum variance. arma::uword maxVarCluster; variances.max(maxVarCluster); // Now, inside this cluster, find the point which is furthest away. size_t furthestPoint = data.n_cols; double maxDistance = -DBL_MAX; for (size_t i = 0; i < data.n_cols; ++i) { if (assignments[i] == maxVarCluster) { const double distance = std::pow(metric.Evaluate(data.col(i), newCentroids.col(maxVarCluster)), 2.0); if (distance > maxDistance) { maxDistance = distance; furthestPoint = i; } } } // Take that point and add it to the empty cluster. newCentroids.col(maxVarCluster) *= (double(clusterCounts[maxVarCluster]) / double(clusterCounts[maxVarCluster] - 1)); newCentroids.col(maxVarCluster) -= (1.0 / (clusterCounts[maxVarCluster] - 1.0)) * arma::vec(data.col(furthestPoint)); clusterCounts[maxVarCluster]--; clusterCounts[emptyCluster]++; newCentroids.col(emptyCluster) = arma::vec(data.col(furthestPoint)); assignments[furthestPoint] = emptyCluster; // Modify the variances, as necessary. variances[emptyCluster] = 0; // One has already been subtracted from clusterCounts[maxVarCluster]. variances[maxVarCluster] = (1.0 / (clusterCounts[maxVarCluster])) * ((clusterCounts[maxVarCluster] + 1) * variances[maxVarCluster] - maxDistance); // Output some debugging information. Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " << emptyCluster << ".\n"; return 1; // We only changed one point. }
void Recipe::set_coefficients(int i, int j, const MatType &coef) { for(int in_chan = 0; in_chan < coef.rows(); ++in_chan) for(int out_chan = 0; out_chan < coef.cols(); ++out_chan) { int ac_map_i = in_chan*height + i; int ac_map_j = out_chan*width +j; ac(ac_map_i, ac_map_j) = coef(in_chan,out_chan); } }
size_t PerformSplit(MatType& data, const size_t begin, const size_t count, const typename SplitType::SplitInfo& splitInfo, std::vector<size_t>& oldFromNew) { // This method modifies the input dataset. We loop both from the left and // right sides of the points contained in this node. size_t left = begin; size_t right = begin + count - 1; // First half-iteration of the loop is out here because the termination // condition is in the middle. while ((left <= right) && (SplitType::AssignToLeftNode(data.col(left), splitInfo))) left++; while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) && (left <= right) && (right > 0)) right--; // Shortcut for when all points are on the right. if (left == right && right == 0) return left; while (left <= right) { // Swap columns. data.swap_cols(left, right); // Update the indices for what we changed. size_t t = oldFromNew[left]; oldFromNew[left] = oldFromNew[right]; oldFromNew[right] = t; // See how many points on the left are correct. When they are correct, // increase the left counter accordingly. When we encounter one that isn't // correct, stop. We will switch it later. while (SplitType::AssignToLeftNode(data.col(left), splitInfo) && (left <= right)) left++; // Now see how many points on the right are correct. When they are correct, // decrease the right counter accordingly. When we encounter one that isn't // correct, stop. We will switch it with the wrong point we found in the // previous loop. while ((!SplitType::AssignToLeftNode(data.col(right), splitInfo)) && (left <= right)) right--; } Log::Assert(left == right + 1); return left; }
size_t MaxVarianceNewCluster::EmptyCluster(const MatType& data, const size_t emptyCluster, const MatType& centroids, arma::Col<size_t>& clusterCounts, arma::Col<size_t>& assignments) { // First, we need to find the cluster with maximum variance (by which I mean // the sum of the covariance matrices). arma::vec variances; variances.zeros(clusterCounts.n_elem); // Start with 0. // Add the variance of each point's distance away from the cluster. I think // this is the sensible thing to do. for (size_t i = 0; i < data.n_cols; i++) { variances[assignments[i]] += arma::as_scalar( arma::var(data.col(i) - centroids.col(assignments[i]))); } // Now find the cluster with maximum variance. arma::uword maxVarCluster; variances.max(maxVarCluster); // Now, inside this cluster, find the point which is furthest away. size_t furthestPoint = data.n_cols; double maxDistance = -DBL_MAX; for (size_t i = 0; i < data.n_cols; i++) { if (assignments[i] == maxVarCluster) { double distance = arma::as_scalar( arma::var(data.col(i) - centroids.col(maxVarCluster))); if (distance > maxDistance) { maxDistance = distance; furthestPoint = i; } } } // Take that point and add it to the empty cluster. clusterCounts[maxVarCluster]--; clusterCounts[emptyCluster]++; assignments[furthestPoint] = emptyCluster; // Output some debugging information. Log::Debug << "Point " << furthestPoint << " assigned to empty cluster " << emptyCluster << ".\n"; return 1; // We only changed one point. }
/** Dimensionality check during initialization */ bool dimCheck(){ if( Sx_.rows() != Sx_.cols() ){ std::cerr << "Error: MatType must be a square matrix \n"; return false; } if( Sx_.rows() != x_.size() ){ std::cerr << "Error: VecType and MatType dimension mismatch \n"; return false; } nDim_ = x_.size(); return true; }
void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Train( const MatType& data, const arma::Row<size_t>& labels, const arma::rowvec& instanceWeights) { size_t j, i = 0; bool converged = false; size_t tempLabel; arma::uword maxIndexRow, maxIndexCol; arma::mat tempLabelMat; LearnPolicy LP; const bool hasWeights = (instanceWeights.n_elem > 0); while ((i < maxIterations) && (!converged)) { // This outer loop is for each iteration, and we use the 'converged' // variable for noting whether or not convergence has been reached. i++; converged = true; // Now this inner loop is for going through the dataset in each iteration. for (j = 0; j < data.n_cols; j++) { // Multiply for each variable and check whether the current weight vector // correctly classifies this. tempLabelMat = weights.t() * data.col(j) + biases; tempLabelMat.max(maxIndexRow, maxIndexCol); // Check whether prediction is correct. if (maxIndexRow != labels(0, j)) { // Due to incorrect prediction, convergence set to false. converged = false; tempLabel = labels(0, j); // Send maxIndexRow for knowing which weight to update, send j to know // the value of the vector to update it with. Send tempLabel to know // the correct class. if (hasWeights) LP.UpdateWeights(data.col(j), weights, biases, maxIndexRow, tempLabel, instanceWeights(j)); else LP.UpdateWeights(data.col(j), weights, biases, maxIndexRow, tempLabel); } } } }
typename std::enable_if<ApplyKernel, bool>::type MeanShift<UseKernel, KernelType, MatType>:: CalculateCentroid(const MatType& data, const std::vector<size_t>& neighbors, const std::vector<double>& distances, arma::colvec& centroid) { double sumWeight = 0; for (size_t i = 0; i < neighbors.size(); ++i) { if (distances[i] > 0) { double dist = distances[i] / radius; double weight = kernel.Gradient(dist) / dist; sumWeight += weight; centroid += weight * data.unsafe_col(neighbors[i]); } } if (sumWeight != 0) { centroid /= sumWeight; return true; } return false; }
inline static void Initialize(const MatType& V, const size_t r, arma::mat& W, arma::mat& H) { const size_t n = V.n_rows; const size_t m = V.n_cols; if (columnsToAverage > m) { Log::Warn << "Number of random columns (columnsToAverage) is more than " << "the number of columns available in the V matrix; weird results " << "may ensue!" << std::endl; } W.zeros(n, r); // Initialize W matrix with random columns. for (size_t col = 0; col < r; col++) { for (size_t randCol = 0; randCol < columnsToAverage; randCol++) { // .col() does not work in this case, as of Armadillo 3.920. W.unsafe_col(col) += V.col(math::RandInt(0, m)); } } // Now divide by p. W /= columnsToAverage; // Initialize H to random values. H.randu(r, m); }
void MaxVarianceNewCluster::Precalculate(const MatType& data, const arma::mat& oldCentroids, arma::Col<size_t>& clusterCounts, MetricType& metric) { // We have to calculate the variances of each cluster and the assignments of // each point. This is most easily done by iterating through the entire // dataset. variances.zeros(oldCentroids.n_cols); assignments.set_size(data.n_cols); // Add the variance of each point's distance away from the cluster. I think // this is the sensible thing to do. for (size_t i = 0; i < data.n_cols; ++i) { // Find the closest centroid to this point. double minDistance = std::numeric_limits<double>::infinity(); size_t closestCluster = oldCentroids.n_cols; // Invalid value. for (size_t j = 0; j < oldCentroids.n_cols; j++) { const double distance = metric.Evaluate(data.col(i), oldCentroids.col(j)); if (distance < minDistance) { minDistance = distance; closestCluster = j; } } assignments[i] = closestCluster; variances[closestCluster] += std::pow(metric.Evaluate(data.col(i), oldCentroids.col(closestCluster)), 2.0); } // Divide by the number of points in the cluster to produce the variance, // unless the cluster is empty or contains only one point, in which case we // set the variance to 0. for (size_t i = 0; i < clusterCounts.n_elem; ++i) if (clusterCounts[i] <= 1) variances[i] = 0; else variances[i] /= clusterCounts[i]; }
DecisionStump<MatType>::DecisionStump(const MatType& data, const arma::Row<size_t>& labels, const size_t classes, size_t inpBucketSize) { numClass = classes; bucketSize = inpBucketSize; // If classLabels are not all identical, proceed with training. int bestAtt = 0; double entropy; const double rootEntropy = CalculateEntropy<size_t>( labels.subvec(0, labels.n_elem - 1)); double gain, bestGain = 0.0; for (int i = 0; i < data.n_rows; i++) { // Go through each attribute of the data. if (IsDistinct<double>(data.row(i))) { // For each attribute with non-identical values, treat it as a potential // splitting attribute and calculate entropy if split on it. entropy = SetupSplitAttribute(data.row(i), labels); // Rcpp::Rcout << "Entropy for attribute " << i << " is " << entropy << ".\n"; gain = rootEntropy - entropy; // Find the attribute with the best entropy so that the gain is // maximized. // if (entropy < bestEntropy) // Instead of the above rule, we are maximizing gain, which was // what is returned from SetupSplitAttribute. if (gain < bestGain) { bestAtt = i; bestGain = gain; } } } splitAttribute = bestAtt; // Once the splitting column/attribute has been decided, train on it. TrainOnAtt<double>(data.row(splitAttribute), labels); }
void LogisticRegression<MatType>::Classify(const MatType& dataset, arma::mat& probabilities) const { // Set correct size of output matrix. probabilities.set_size(2, dataset.n_cols); probabilities.row(1) = 1.0 / (1.0 + arma::exp(-parameters(0) - dataset.t() * parameters.subvec(1, parameters.n_elem - 1))).t(); probabilities.row(0) = 1.0 - probabilities.row(1); }
void LogisticRegression<MatType>::Predict(const MatType& predictors, arma::Row<size_t>& responses, const double decisionBoundary) const { // Calculate sigmoid function for each point. The (1.0 - decisionBoundary) // term correctly sets an offset so that floor() returns 0 or 1 correctly. responses = arma::conv_to<arma::Row<size_t>>::from((1.0 / (1.0 + arma::exp(-parameters(0) - predictors.t() * parameters.subvec(1, parameters.n_elem - 1)))) + (1.0 - decisionBoundary)); }
void UBTreeSplit<BoundType, MatType>::InitializeAddresses(const MatType& data) { addresses.resize(data.n_cols); // Calculate all addresses. for (size_t i = 0; i < data.n_cols; i++) { addresses[i].first.zeros(data.n_rows); bound::addr::PointToAddress(addresses[i].first, data.col(i)); addresses[i].second = i; } }
inline static void Cluster(const MatType& data, const size_t clusters, arma::mat& centroids) { centroids.set_size(data.n_rows, clusters); for (size_t i = 0; i < clusters; ++i) { // Randomly sample a point. const size_t index = math::RandInt(0, data.n_cols); centroids.col(i) = data.col(index); } }
void BinarySpaceTree<BoundType, StatisticType, MatType, SplitType>::SplitNode( MatType& data, std::vector<size_t>& oldFromNew, const size_t maxLeafSize, SplitType& splitter) { // This should be a single function for Bound. // We need to expand the bounds of this node properly. bound |= data.cols(begin, begin + count - 1); // Calculate the furthest descendant distance. furthestDescendantDistance = 0.5 * bound.Diameter(); // First, check if we need to split at all. if (count <= maxLeafSize) return; // We can't split this. // splitCol denotes the two partitions of the dataset after the split. The // points on its left go to the left child and the others go to the right // child. size_t splitCol; // Split the node. The elements of 'data' are reordered by the splitting // algorithm. This function call updates splitCol and oldFromNew. const bool split = splitter.SplitNode(bound, data, begin, count, splitCol, oldFromNew); // The node may not be always split. For instance, if all the points are the // same, we can't split them. if (!split) return; // Now that we know the split column, we will recursively split the children // by calling their constructors (which perform this splitting process). left = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, begin, splitCol - begin, oldFromNew, splitter, this, maxLeafSize); right = new BinarySpaceTree<BoundType, StatisticType, MatType>(data, splitCol, begin + count - splitCol, oldFromNew, splitter, this, maxLeafSize); // Calculate parent distances for those two nodes. arma::vec centroid, leftCentroid, rightCentroid; Centroid(centroid); left->Centroid(leftCentroid); right->Centroid(rightCentroid); const double leftParentDistance = bound.Metric().Evaluate(centroid, leftCentroid); const double rightParentDistance = bound.Metric().Evaluate(centroid, rightCentroid); left->ParentDistance() = leftParentDistance; right->ParentDistance() = rightParentDistance; }
void DecisionStump<MatType>::Train(const MatType& data, const arma::Row<size_t>& labels, const arma::rowvec& weights) { this->classes = classes; this->bucketSize = bucketSize; // If classLabels are not all identical, proceed with training. size_t bestDim = 0; double entropy; const double rootEntropy = CalculateEntropy<UseWeights>(labels, weights); double gain, bestGain = 0.0; for (size_t i = 0; i < data.n_rows; i++) { // Go through each dimension of the data. if (IsDistinct(data.row(i))) { // For each dimension with non-identical values, treat it as a potential // splitting dimension and calculate entropy if split on it. entropy = SetupSplitDimension<UseWeights>(data.row(i), labels, weights); gain = rootEntropy - entropy; // Find the dimension with the best entropy so that the gain is // maximized. // We are maximizing gain, which is what is returned from // SetupSplitDimension(). if (gain < bestGain) { bestDim = i; bestGain = gain; } } } splitDimension = bestDim; // Once the splitting column/dimension has been decided, train on it. TrainOnDim(data.row(splitDimension), labels); }
typename std::enable_if<!ApplyKernel, bool>::type MeanShift<UseKernel, KernelType, MatType>:: CalculateCentroid(const MatType& data, const std::vector<size_t>& neighbors, const std::vector<double>&, /*unused*/ arma::colvec& centroid) { for (size_t i = 0; i < neighbors.size(); ++i) centroid += data.unsafe_col(neighbors[i]); centroid /= neighbors.size(); return true; }
void MeanShift<UseKernel, KernelType, MatType>::GenSeeds( const MatType& data, const double binSize, const int minFreq, MatType& seeds) { typedef arma::colvec VecType; std::map<VecType, int, less<VecType> > allSeeds; for (size_t i = 0; i < data.n_cols; ++i) { VecType binnedPoint = arma::floor(data.unsafe_col(i) / binSize); if (allSeeds.find(binnedPoint) == allSeeds.end()) allSeeds[binnedPoint] = 1; else allSeeds[binnedPoint]++; } // Remove seeds with too few points. First we count the number of seeds we // end up with, then we add them. std::map<VecType, int, less<VecType> >::iterator it; size_t count = 0; for (it = allSeeds.begin(); it != allSeeds.end(); ++it) if (it->second >= minFreq) ++count; seeds.set_size(data.n_rows, count); count = 0; for (it = allSeeds.begin(); it != allSeeds.end(); ++it) { if (it->second >= minFreq) { seeds.col(count) = it->first; ++count; } } seeds *= binSize; }
RegularizedSVDFunction<MatType>::RegularizedSVDFunction(const MatType& data, const size_t rank, const double lambda) : data(math::MakeAlias(const_cast<MatType&>(data), false)), rank(rank), lambda(lambda) { // Number of users and items in the data. numUsers = max(data.row(0)) + 1; numItems = max(data.row(1)) + 1; // Initialize the parameters. initialPoint.randu(rank, numUsers + numItems); }
size_t MeanSplit<BoundType, MatType>:: PerformSplit(MatType& data, const size_t begin, const size_t count, const size_t splitDimension, const double splitVal, std::vector<size_t>& oldFromNew) { // This method modifies the input dataset. We loop both from the left and // right sides of the points contained in this node. The points less than // splitVal should be on the left side of the matrix, and the points greater // than splitVal should be on the right side of the matrix. size_t left = begin; size_t right = begin + count - 1; // First half-iteration of the loop is out here because the termination // condition is in the middle. while ((data(splitDimension, left) < splitVal) && (left <= right)) left++; while ((data(splitDimension, right) >= splitVal) && (left <= right) && (right > 0)) right--; while (left <= right) { // Swap columns. data.swap_cols(left, right); // Update the indices for what we changed. size_t t = oldFromNew[left]; oldFromNew[left] = oldFromNew[right]; oldFromNew[right] = t; // See how many points on the left are correct. When they are correct, // increase the left counter accordingly. When we encounter one that isn't // correct, stop. We will switch it later. while ((data(splitDimension, left) < splitVal) && (left <= right)) left++; // Now see how many points on the right are correct. When they are correct, // decrease the right counter accordingly. When we encounter one that isn't // correct, stop. We will switch it with the wrong point we found in the // previous loop. while ((data(splitDimension, right) >= splitVal) && (left <= right)) right--; } Log::Assert(left == right + 1); return left; }
void Perceptron<LearnPolicy, WeightInitializationPolicy, MatType>::Classify( const MatType& test, arma::Row<size_t>& predictedLabels) { arma::vec tempLabelMat; arma::uword maxIndex = 0; // Could probably be faster if done in batch. for (size_t i = 0; i < test.n_cols; i++) { tempLabelMat = weights.t() * test.col(i) + biases; tempLabelMat.max(maxIndex); predictedLabels(0, i) = maxIndex; } }
void RefinedStart::Cluster(const MatType& data, const size_t clusters, arma::mat& centroids) const { // This will hold the sampled datasets. const size_t numPoints = size_t(percentage * data.n_cols); MatType sampledData(data.n_rows, numPoints); // vector<bool> is packed so each bool is 1 bit. std::vector<bool> pointsUsed(data.n_cols, false); arma::mat sampledCentroids(data.n_rows, samplings * clusters); for (size_t i = 0; i < samplings; ++i) { // First, assemble the sampled dataset. size_t curSample = 0; while (curSample < numPoints) { // Pick a random point in [0, numPoints). size_t sample = (size_t) math::RandInt(data.n_cols); if (!pointsUsed[sample]) { // This point isn't used yet. So we'll put it in our sample. pointsUsed[sample] = true; sampledData.col(curSample) = data.col(sample); ++curSample; } } // Now, using the sampled dataset, run k-means. In the case of an empty // cluster, we re-initialize that cluster as the point furthest away from // the cluster with maximum variance. This is not *exactly* what the paper // implements, but it is quite similar, and we'll call it "good enough". KMeans<> kmeans; kmeans.Cluster(sampledData, clusters, centroids); // Store the sampled centroids. sampledCentroids.cols(i * clusters, (i + 1) * clusters - 1) = centroids; pointsUsed.assign(data.n_cols, false); } // Now, we run k-means on the sampled centroids to get our final clusters. KMeans<> kmeans; kmeans.Cluster(sampledCentroids, clusters, centroids); }
bool RPTreeMaxSplit<BoundType, MatType>::GetSplitVal( const MatType& data, const size_t begin, const size_t count, const arma::Col<ElemType>& direction, ElemType& splitVal) { const size_t maxNumSamples = 100; const size_t numSamples = std::min(maxNumSamples, count); arma::uvec samples; // Get no more than numSamples distinct samples. math::ObtainDistinctSamples(begin, begin + count, numSamples, samples); arma::Col<ElemType> values(samples.n_elem); // Find the median of scalar products of the samples and the normal vector. for (size_t k = 0; k < samples.n_elem; k++) values[k] = arma::dot(data.col(samples[k]), direction); const ElemType maximum = arma::max(values); const ElemType minimum = arma::min(values); if (minimum == maximum) return false; splitVal = arma::median(values); // Add a random deviation to the median. // This algorithm differs from the method suggested in the random projection // tree paper, for two reasons: // 1. Evaluating the method proposed in the paper is time-consuming, since // we must solve the furthest-pair problem. // 2. The proposed method does not appear to guarantee that a valid split // value will be generated (i.e. it can produce a split value where there // may be no points on the left or the right). splitVal += math::Random((minimum - splitVal) * 0.75, (maximum - splitVal) * 0.75); if (splitVal == maximum) splitVal = minimum; return true; }