Esempio n. 1
0
void Split(const arma::Mat<T>& input,
           const arma::Row<U>& inputLabel,
           arma::Mat<T>& trainData,
           arma::Mat<T>& testData,
           arma::Row<U>& trainLabel,
           arma::Row<U>& testLabel,
           const double testRatio)
{
  const size_t testSize = static_cast<size_t>(input.n_cols * testRatio);
  const size_t trainSize = input.n_cols - testSize;
  trainData.set_size(input.n_rows, trainSize);
  testData.set_size(input.n_rows, testSize);
  trainLabel.set_size(trainSize);
  testLabel.set_size(testSize);

  const arma::Col<size_t> order =
      arma::shuffle(arma::linspace<arma::Col<size_t>>(0, input.n_cols - 1,
                                                      input.n_cols));

  for (size_t i = 0; i != trainSize; ++i)
  {
    trainData.col(i) = input.col(order[i]);
    trainLabel(i) = inputLabel(order[i]);
  }

  for (size_t i = 0; i != testSize; ++i)
  {
    testData.col(i) = input.col(order[i + trainSize]);
    testLabel(i) = inputLabel(order[i + trainSize]);
  }
}
Esempio n. 2
0
void ConcreteGridView<DuneGridView>::getRawElementDataImpl(
        arma::Mat<CoordinateType>& vertices,
        arma::Mat<int>& elementCorners,
        arma::Mat<char>& auxData) const
{
    typedef typename DuneGridView::Grid DuneGrid;
    typedef typename DuneGridView::IndexSet DuneIndexSet;
    const int dimGrid = DuneGrid::dimension;
    const int dimWorld = DuneGrid::dimensionworld;
    const int codimVertex = dimGrid;
    const int codimElement = 0;
    typedef Dune::LeafMultipleCodimMultipleGeomTypeMapper<DuneGrid,
            Dune::MCMGElementLayout> DuneElementMapper;
    typedef typename DuneGridView::template Codim<codimVertex>::Iterator
            DuneVertexIterator;
    typedef typename DuneGridView::template Codim<codimElement>::Iterator
            DuneElementIterator;
    typedef typename DuneGridView::template Codim<codimVertex>::Geometry
            DuneVertexGeometry;
    typedef typename DuneGridView::template Codim<codimElement>::Geometry
            DuneElementGeometry;
    typedef typename DuneGrid::ctype ctype;

    const DuneIndexSet& indexSet = m_dune_gv.indexSet();

    vertices.set_size(dimWorld, indexSet.size(codimVertex));
    for (DuneVertexIterator it = m_dune_gv.template begin<codimVertex>();
         it != m_dune_gv.template end<codimVertex>(); ++it)
    {
        size_t index = indexSet.index(*it);
        const DuneVertexGeometry& geom = it->geometry();
        Dune::FieldVector<ctype, dimWorld> vertex = geom.corner(0);
        for (int i = 0; i < dimWorld; ++i)
            vertices(i, index) = vertex[i];
    }

    const int MAX_CORNER_COUNT = dimWorld == 2 ? 2 : 4;
    DuneElementMapper elementMapper(m_dune_gv.grid());
    elementCorners.set_size(MAX_CORNER_COUNT, elementMapper.size());
    for (DuneElementIterator it = m_dune_gv.template begin<codimElement>();
         it != m_dune_gv.template end<codimElement>(); ++it)
    {
        size_t index = elementMapper.map(*it);
        const Dune::GenericReferenceElement<ctype, dimGrid>& refElement =
                Dune::GenericReferenceElements<ctype, dimGrid>::general(it->type());
        const int cornerCount = refElement.size(codimVertex);
        assert(cornerCount <= MAX_CORNER_COUNT);
        for (int i = 0; i < cornerCount; ++i)
            elementCorners(i, index) = indexSet.subIndex(*it, i, codimVertex);
        for (int i = cornerCount; i < MAX_CORNER_COUNT; ++i)
            elementCorners(i, index) = -1;
    }

    auxData.set_size(0, elementCorners.n_cols);
}
    inline ParallelKinematicMachine3PUPS::ParallelKinematicMachine3PUPS() noexcept {
      setMinimalActiveJointActuations({0.39, 0.39, 0.39});
      setMaximalActiveJointActuations({0.95, 0.95, 0.95});

      setEndEffectorJointPositions({
        -0.025561381023353, 0.086293776138137, 0.12,
        0.087513292835791, -0.021010082747031, 0.12,
        -0.061951911812438, -0.065283693391106, 0.12});

      setRedundantJointStartPositions({
        -0.463708870031622, 0.417029254828353, -0.346410161513775,
        0.593012363818459, 0.193069033993384, -0.346410161513775,
        -0.129303493786837, -0.610098288821738, -0.346410161513775});

      setRedundantJointEndPositions({
        -0.247202519085512, 0.292029254828353, 0.086602540378444,
        0.376506012872349, 0.068069033993384, 0.086602540378444,
        -0.129303493786837, -0.360098288821738, 0.086602540378444});

      redundantJointStartToEndPositions_ = redundantJointEndPositions_ - redundantJointStartPositions_;
      redundantJointIndicies_ = arma::find(arma::any(redundantJointStartToEndPositions_));

      redundantJointAngles_.set_size(3, redundantJointIndicies_.n_elem);

      for (std::size_t n = 0; n < redundantJointIndicies_.n_elem; ++n) {
        const double& redundantJointXAngle = std::atan2(redundantJointStartToEndPositions_(1, n), redundantJointStartToEndPositions_(0, n));
        const double& redundantJointYAngle = std::atan2(redundantJointStartToEndPositions_(2, n), redundantJointStartToEndPositions_(1, n));
        redundantJointAngles_.col(n) = arma::Col<double>::fixed<3>({std::cos(redundantJointXAngle) * std::cos(redundantJointYAngle), std::sin(redundantJointXAngle) * std::cos(redundantJointYAngle), std::sin(redundantJointYAngle)});
      }
    }
Esempio n. 4
0
void MainWindow::loadSeries() {
    QString seriesFile = __fileManager.openFile(0, this, "Open Series File...", "*.mat");

    if (seriesFile != "") {
        cout << "Loading start ..." << flush;
        _data.load(seriesFile.toStdString().c_str());
        cout << " done. " << endl;

        _dataT = arma::trans(_data);
        cout << _data.n_cols << "," << _data.n_rows << endl;

        _imageBuffer.set_size(_mapid.n_rows, _mapid.n_cols);
        for (int i = 0; i < _mapid.n_rows; i++) {
            for (int j = 0; j < _mapid.n_cols; j++) {
                if (_mapid(i,j) != _mapid(i,j)) {
                    _imageBuffer(i,j) = NAN;
                } else {
                    _imageBuffer(i,j) = 0;
                }
            }
        }

        ui.slider->setMaximum(_data.n_cols - 1);

        ui.minValue->setValue(_data.min());
        ui.maxValue->setValue(_data.max());

        _mapItem->setImage(_imageBuffer.memptr(), _imageBuffer.n_rows, _imageBuffer.n_cols);
        _mapItem->setRange(ui.minValue->value(), ui.maxValue->value());
        _mapItem->setInteraction(this);

        selectValue(0);
    }
}
Esempio n. 5
0
void FastMKS<KernelType, TreeType>::Search(TreeType* queryTree,
                                           const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // If either naive mode or single mode is specified, this must fail.
  if (naive || singleMode)
  {
    throw std::invalid_argument("can't call Search() with a query tree when "
        "single mode or naive search is enabled");
  }

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, queryTree->Dataset().n_cols);
  kernels.set_size(k, queryTree->Dataset().n_cols);
  kernels.fill(-DBL_MAX);

  Timer::Start("computing_products");
  typedef FastMKSRules<KernelType, TreeType> RuleType;
  RuleType rules(referenceSet, queryTree->Dataset(), indices, kernels,
      metric.Kernel());

  typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

  traverser.Traverse(*queryTree, *referenceTree);

  Log::Info << rules.BaseCases() << " base cases." << std::endl;
  Log::Info << rules.Scores() << " scores." << std::endl;

  Timer::Stop("computing_products");
}
Esempio n. 6
0
void SpaceHelper<BasisFunctionType>::
    getNormalsAtGlobalDofInterpolationPoints_defaultImplementation(
        const Space<BasisFunctionType> &space,
        arma::Mat<CoordinateType> &normals) {
  std::vector<Point3D<CoordinateType>> vNormals;
  space.getGlobalDofNormals(vNormals);

  const size_t pointCount = vNormals.size();
  const size_t worldDim = space.worldDimension();
  normals.set_size(worldDim, pointCount);
  for (size_t p = 0; p < pointCount; ++p) {
    normals(0, p) = acc(vNormals, p).x;
    if (worldDim > 1)
      normals(1, p) = acc(vNormals, p).y;
    if (worldDim > 2)
      normals(2, p) = acc(vNormals, p).z;
  }
}
Esempio n. 7
0
void LSHSearch<SortPolicy>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const size_t numTablesToSearch)
{
  // Set the size of the neighbor and distance matrices.
  resultingNeighbors.set_size(k, querySet.n_cols);
  distances.set_size(k, querySet.n_cols);
  distances.fill(SortPolicy::WorstDistance());
  resultingNeighbors.fill(referenceSet.n_cols);

  size_t avgIndicesReturned = 0;

  Timer::Start("computing_neighbors");

  // Go through every query point sequentially.
  for (size_t i = 0; i < querySet.n_cols; i++)
  {
    // Hash every query into every hash table and eventually into the
    // 'secondHashTable' to obtain the neighbor candidates.
    arma::uvec refIndices;
    ReturnIndicesFromTable(i, refIndices, numTablesToSearch);

    // An informative book-keeping for the number of neighbor candidates
    // returned on average.
    avgIndicesReturned += refIndices.n_elem;

    // Sequentially go through all the candidates and save the best 'k'
    // candidates.
    for (size_t j = 0; j < refIndices.n_elem; j++)
      BaseCase(distances, resultingNeighbors, i, (size_t) refIndices[j]);
  }

  Timer::Stop("computing_neighbors");

  distanceEvaluations += avgIndicesReturned;
  avgIndicesReturned /= querySet.n_cols;
  Log::Info << avgIndicesReturned << " distinct indices returned on average." <<
      std::endl;
}
    virtual void evaluate(const GeometricalData<CoordinateType>& geomData,
                          arma::Mat<ValueType>& result) const {
        const arma::Mat<CoordinateType>& points  = geomData.globals;
        const arma::Mat<CoordinateType>& normals = geomData.normals;

#ifndef NDEBUG
        if ((int)points.n_rows != worldDimension() ||
                (int)points.n_rows != worldDimension())
            throw std::invalid_argument(
                    "SurfaceNormalAndDomainIndexDependentFunction::evaluate(): "
                    "incompatible world dimension");
#endif

        const size_t pointCount = points.n_cols;
        result.set_size(codomainDimension(), pointCount);
        for (size_t i = 0; i < pointCount; ++i) {
            arma::Col<ValueType> activeResultColumn = result.unsafe_col(i);
            m_functor.evaluate(points.unsafe_col(i), normals.unsafe_col(i),
                               geomData.domainIndex, activeResultColumn);
        }
    }
void DefaultLocalAssemblerForOperatorsOnSurfacesUtilities<BasisFunctionType>::
    precalculateElementSizesAndCentersForSingleGrid(
        const RawGridGeometry<CoordinateType> &rawGeometry,
        std::vector<CoordinateType> &elementSizesSquared,
        arma::Mat<CoordinateType> &elementCenters,
        CoordinateType &averageElementSize) {
  const size_t elementCount = rawGeometry.elementCount();
  const int worldDim = rawGeometry.worldDimension();

  averageElementSize = 0.; // We will store here temporarily
                           // the sum of element sizes
  elementSizesSquared.resize(elementCount);
  for (int e = 0; e < elementCount; ++e) {
    elementSizesSquared[e] = elementSizeSquared(e, rawGeometry);
    averageElementSize += sqrt(elementSizesSquared[e]);
  }
  averageElementSize /= elementCount;

  elementCenters.set_size(worldDim, elementCount);
  for (int e = 0; e < elementCount; ++e)
    elementCenters.col(e) = elementCenter(e, rawGeometry);
}
void DefaultEvaluatorForIntegralOperators<BasisFunctionType, KernelType,
ResultType, GeometryFactory>::evaluate(
        Region region,
        const arma::Mat<CoordinateType>& points, arma::Mat<ResultType>& result) const
{
    const size_t pointCount = points.n_cols;
    const int outputComponentCount = m_integral->resultDimension();

    result.set_size(outputComponentCount, pointCount);
    result.fill(0.);

    const GeometricalData<CoordinateType>& trialGeomData =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldTrialGeomData :
                m_farFieldTrialGeomData;
    const CollectionOf2dArrays<ResultType>& trialTransfValues =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldTrialTransfValues :
                m_farFieldTrialTransfValues;
    const std::vector<CoordinateType>& weights =
            (region == EvaluatorForIntegralOperators<ResultType>::NEAR_FIELD) ?
                m_nearFieldWeights :
                m_farFieldWeights;

    // Do things in chunks of 96 points -- in order to avoid creating
    // too large arrays of kernel values
    const size_t chunkSize = 96;
    const size_t chunkCount = (pointCount + chunkSize - 1) / chunkSize;

    int maxThreadCount = 1;
    if (!m_parallelizationOptions.isOpenClEnabled()) {
        if (m_parallelizationOptions.maxThreadCount() ==
                ParallelizationOptions::AUTO)
            maxThreadCount = tbb::task_scheduler_init::automatic;
        else
            maxThreadCount = m_parallelizationOptions.maxThreadCount();
    }
    tbb::task_scheduler_init scheduler(maxThreadCount);
    typedef EvaluationLoopBody<
            BasisFunctionType, KernelType, ResultType> Body;
    {
        Fiber::SerialBlasRegion region;
        tbb::parallel_for(tbb::blocked_range<size_t>(0, chunkCount),
                          Body(chunkSize,
                               points, trialGeomData, trialTransfValues, weights,
                               *m_kernels, *m_integral, result));
    }

//    // Old serial version
//    CollectionOf4dArrays<KernelType> kernelValues;
//    GeometricalData<CoordinateType> evalPointGeomData;
//    for (size_t start = 0; start < pointCount; start += chunkSize)
//    {
//        size_t end = std::min(start + chunkSize, pointCount);
//        evalPointGeomData.globals = points.cols(start, end - 1 /* inclusive */);
//        m_kernels->evaluateOnGrid(evalPointGeomData, trialGeomData, kernelValues);
//        // View into the current chunk of the "result" array
//        _2dArray<ResultType> resultChunk(outputComponentCount, end - start,
//                                         result.colptr(start));
//        m_integral->evaluate(trialGeomData,
//                             kernelValues,
//                             weightedTrialTransfValues,
//                             resultChunk);
//    }
}
Esempio n. 11
0
void RAModel<SortPolicy>::Search(arma::mat&& querySet,
                                 const size_t k,
                                 arma::Mat<size_t>& neighbors,
                                 arma::mat& distances)
{
  // Apply the random basis if necessary.
  if (randomBasis)
    querySet = q * querySet;

  Log::Info << "Searching for " << k << " approximate nearest neighbors with ";
  if (!Naive() && !SingleMode())
    Log::Info << "dual-tree rank-approximate " << TreeName() << " search...";
  else if (!Naive())
    Log::Info << "single-tree rank-approximate " << TreeName() << " search...";
  else
    Log::Info << "brute-force (naive) rank-approximate search...";
  Log::Info << std::endl;

  switch (treeType)
  {
    case KD_TREE:
      if (!kdTreeRA->Naive() && !kdTreeRA->SingleMode())
      {
        // Build a second tree and search.
        Timer::Start("tree_building");
        Log::Info << "Building query tree..." << std::endl;
        std::vector<size_t> oldFromNewQueries;
        typename RAType<tree::KDTree>::Tree queryTree(std::move(querySet),
            oldFromNewQueries, leafSize);
        Log::Info << "Tree built." << std::endl;
        Timer::Stop("tree_building");

        arma::Mat<size_t> neighborsOut;
        arma::mat distancesOut;
        kdTreeRA->Search(&queryTree, k, neighborsOut, distancesOut);

        // Unmap the query points.
        distances.set_size(distancesOut.n_rows, distancesOut.n_cols);
        neighbors.set_size(neighborsOut.n_rows, neighborsOut.n_cols);
        for (size_t i = 0; i < neighborsOut.n_cols; ++i)
        {
          neighbors.col(oldFromNewQueries[i]) = neighborsOut.col(i);
          distances.col(oldFromNewQueries[i]) = distancesOut.col(i);
        }
      }
      else
      {
        // Search without building a second tree.
        kdTreeRA->Search(querySet, k, neighbors, distances);
      }
      break;
    case COVER_TREE:
      // No mapping necessary.
      coverTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case R_TREE:
      // No mapping necessary.
      rTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case R_STAR_TREE:
      // No mapping necessary.
      rStarTreeRA->Search(querySet, k, neighbors, distances);
      break;
    case X_TREE:
      // No mapping necessary.
      xTreeRA->Search(querySet, k, neighbors, distances);
      break;
  }
}
Esempio n. 12
0
void CF<FactorizerType>::GetRecommendations(const size_t numRecs,
                                            arma::Mat<size_t>& recommendations,
                                            arma::Col<size_t>& users)
{
  // Generate new table by multiplying approximate values.
  rating = w * h;

  // Now, we will use the decomposed w and h matrices to estimate what the user
  // would have rated items as, and then pick the best items.

  // Temporarily store feature vector of queried users.
  arma::mat query(rating.n_rows, users.n_elem);

  // Select feature vectors of queried users.
  for (size_t i = 0; i < users.n_elem; i++)
    query.col(i) = rating.col(users(i));

  // Temporary storage for neighborhood of the queried users.
  arma::Mat<size_t> neighborhood;

  // Calculate the neighborhood of the queried users.
  // This should be a templatized option.
  neighbor::AllkNN a(rating);
  arma::mat resultingDistances; // Temporary storage.
  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);

  // Temporary storage for storing the average rating for each user in their
  // neighborhood.
  arma::mat averages = arma::zeros<arma::mat>(rating.n_rows, query.n_cols);

  // Iterate over each query user.
  for (size_t i = 0; i < neighborhood.n_cols; ++i)
  {
    // Iterate over each neighbor of the query user.
    for (size_t j = 0; j < neighborhood.n_rows; ++j)
      averages.col(i) += rating.col(neighborhood(j, i));
    // Normalize average.
    averages.col(i) /= neighborhood.n_rows;
  }

  // Generate recommendations for each query user by finding the maximum numRecs
  // elements in the averages matrix.
  recommendations.set_size(numRecs, users.n_elem);
  recommendations.fill(cleanedData.n_rows); // Invalid item number.
  arma::mat values(numRecs, users.n_elem);
  values.fill(-DBL_MAX); // The smallest possible value.
  for (size_t i = 0; i < users.n_elem; i++)
  {
    // Look through the averages column corresponding to the current user.
    for (size_t j = 0; j < averages.n_rows; ++j)
    {
      // Ensure that the user hasn't already rated the item.
      if (cleanedData(j, users(i)) != 0.0)
        continue; // The user already rated the item.

      // Is the estimated value better than the worst candidate?
      const double value = averages(j, i);
      if (value > values(values.n_rows - 1, i))
      {
        // It should be inserted.  Which position?
        size_t insertPosition = values.n_rows - 1;
        while (insertPosition > 0)
        {
          if (value <= values(insertPosition - 1, i))
            break; // The current value is the right one.
          insertPosition--;
        }

        // Now insert it into the list.
        InsertNeighbor(i, insertPosition, j, value, recommendations,
            values);
      }
    }

    // If we were not able to come up with enough recommendations, issue a
    // warning.
    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
          << "for user " << users(i) << " (not enough un-rated items)!"
          << std::endl;
  }
}
Esempio n. 13
0
void LoadARFF(const std::string& filename,
              arma::Mat<eT>& matrix,
              DatasetMapper<PolicyType>& info)
{
  // First, open the file.
  std::ifstream ifs;
  ifs.open(filename, std::ios::in | std::ios::binary);

  // if file is not open throw an error (file not found).
  if (!ifs.is_open())
  {
    Log::Fatal << "Cannot open file '" << filename << "'. " << std::endl;
  }

  std::string line;
  size_t dimensionality = 0;
  std::vector<bool> types;
  size_t headerLines = 0;
  while (ifs.good())
  {
    // Read the next line, then strip whitespace from either side.
    std::getline(ifs, line, '\n');
    boost::trim(line);
    ++headerLines;

    // Is the first character a comment, or is the line empty?
    if (line[0] == '%' || line.empty())
      continue; // Ignore this line.

    // If the first character is @, we are looking at @relation, @attribute, or
    // @data.
    if (line[0] == '@')
    {
      typedef boost::tokenizer<boost::escaped_list_separator<char>> Tokenizer;
      std::string separators = " \t%"; // Split on comments too.
      boost::escaped_list_separator<char> sep("\\", separators, "{\"");
      Tokenizer tok(line, sep);
      Tokenizer::iterator it = tok.begin();

      // Get the annotation we are looking at.
      std::string annotation(*it);
      std::transform(annotation.begin(), annotation.end(), annotation.begin(),
            ::tolower);

      if (annotation == "@relation")
      {
        // We don't actually have anything to do with the name of the dataset.
        continue;
      }
      else if (annotation == "@attribute")
      {
        ++dimensionality;
        // We need to mark this dimension with its according type.
        ++it; // Ignore the dimension name.
        std::string dimType = *(++it);
        std::transform(dimType.begin(), dimType.end(), dimType.begin(),
            ::tolower);

        if (dimType == "numeric" || dimType == "integer" || dimType == "real")
        {
          types.push_back(false); // The feature is numeric.
        }
        else if (dimType == "string")
        {
          types.push_back(true); // The feature is categorical.
        }
        else if (dimType[0] == '{')
        {
          throw std::logic_error("list of ARFF values not yet supported");
        }
      }
      else if (annotation == "@data")
      {
        // We are in the data section.  So we can move out of this loop.
        break;
      }
      else
      {
        throw std::runtime_error("unknown ARFF annotation '" + (*tok.begin()) +
            "'");
      }
    }
  }

  if (ifs.eof())
    throw std::runtime_error("no @data section found");

  // Reset the DatasetInfo object, if needed.
  if (info.Dimensionality() == 0)
  {
    info = DatasetMapper<PolicyType>(dimensionality);
  }
  else if (info.Dimensionality() != dimensionality)
  {
    std::ostringstream oss;
    oss << "data::LoadARFF(): given DatasetInfo has dimensionality "
        << info.Dimensionality() << ", but data has dimensionality "
        << dimensionality;
    throw std::invalid_argument(oss.str());
  }

  for (size_t i = 0; i < types.size(); ++i)
  {
    if (types[i])
      info.Type(i) = Datatype::categorical;
    else
      info.Type(i) = Datatype::numeric;
  }

  // We need to find out how many lines of data are in the file.
  std::streampos pos = ifs.tellg();
  size_t row = 0;
  while (ifs.good())
  {
    std::getline(ifs, line, '\n');
    ++row;
  }
  // Uncount the EOF row.
  --row;

  // Since we've hit the EOF, we have to call clear() so we can seek again.
  ifs.clear();
  ifs.seekg(pos);

  // Now, set the size of the matrix.
  matrix.set_size(dimensionality, row);

  // Now we are looking at the @data section.
  row = 0;
  while (ifs.good())
  {
    std::getline(ifs, line, '\n');
    boost::trim(line);
    // Each line of the @data section must be a CSV (except sparse data, which
    // we will handle later).  So now we can tokenize the
    // CSV and parse it.  The '?' representing a missing value is not allowed,
    // so if that occurs we throw an exception.  We also throw an exception if
    // any piece of data does not match its type (categorical or numeric).

    // If the first character is {, it is sparse data, and we can just say this
    // is not handled for now...
    if (line[0] == '{')
      throw std::runtime_error("cannot yet parse sparse ARFF data");

    // Tokenize the line.
    typedef boost::tokenizer<boost::escaped_list_separator<char>> Tokenizer;
    boost::escaped_list_separator<char> sep("\\", ",", "\"");
    Tokenizer tok(line, sep);

    size_t col = 0;
    std::stringstream token;
    for (Tokenizer::iterator it = tok.begin(); it != tok.end(); ++it)
    {
      // Check that we are not too many columns in.
      if (col >= matrix.n_rows)
      {
        std::stringstream error;
        error << "Too many columns in line " << (headerLines + row) << ".";
        throw std::runtime_error(error.str());
      }

      // What should this token be?
      if (info.Type(col) == Datatype::categorical)
      {
        // Strip spaces before mapping.
        std::string token = *it;
        boost::trim(token);
        // We load transposed.
        matrix(col, row) = info.template MapString<eT>(token, col);
      }
      else if (info.Type(col) == Datatype::numeric)
      {
        // Attempt to read as numeric.
        token.clear();
        token.str(*it);

        eT val = eT(0);
        token >> val;

        if (token.fail())
        {
          // Check for NaN or inf.
          if (!IsNaNInf(val, token.str()))
          {
            // Okay, it's not NaN or inf.  If it's '?', we issue a specific
            // error, otherwise we issue a general error.
            std::stringstream error;
            std::string tokenStr = token.str();
            boost::trim(tokenStr);
            if (tokenStr == "?")
              error << "Missing values ('?') not supported, ";
            else
              error << "Parse error ";
            error << "at line " << (headerLines + row) << " token " << col
                << ": \"" << tokenStr << "\".";
            throw std::runtime_error(error.str());
          }
        }

        // If we made it to here, we have a value.
        matrix(col, row) = val; // We load transposed.
      }

      ++col;
    }
    ++row;
  }
Esempio n. 14
0
void CF::GetRecommendations(const size_t numRecs,
                            arma::Mat<size_t>& recommendations,
                            arma::Col<size_t>& users)
{
  // We want to avoid calculating the full rating matrix, so we will do nearest
  // neighbor search only on the H matrix, using the observation that if the
  // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
  // H.col(j)).  This can be seen as nearest neighbor search on the H matrix
  // with the Mahalanobis distance where M^{-1} = W^T W.  So, we'll decompose
  // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
  // Then we can perform nearest neighbor search.
  arma::mat l = arma::chol(w.t() * w);
  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.

  // Now, we will use the decomposed w and h matrices to estimate what the user
  // would have rated items as, and then pick the best items.

  // Temporarily store feature vector of queried users.
  arma::mat query(stretchedH.n_rows, users.n_elem);

  // Select feature vectors of queried users.
  for (size_t i = 0; i < users.n_elem; i++)
    query.col(i) = stretchedH.col(users(i));

  // Temporary storage for neighborhood of the queried users.
  arma::Mat<size_t> neighborhood;

  // Calculate the neighborhood of the queried users.
  // This should be a templatized option.
  neighbor::KNN a(stretchedH);
  arma::mat resultingDistances; // Temporary storage.
  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);

  // Generate recommendations for each query user by finding the maximum numRecs
  // elements in the averages matrix.
  recommendations.set_size(numRecs, users.n_elem);
  recommendations.fill(cleanedData.n_rows); // Invalid item number.
  arma::mat values(numRecs, users.n_elem);
  values.fill(-DBL_MAX); // The smallest possible value.
  for (size_t i = 0; i < users.n_elem; i++)
  {
    // First, calculate average of neighborhood values.
    arma::vec averages;
    averages.zeros(cleanedData.n_rows);

    for (size_t j = 0; j < neighborhood.n_rows; ++j)
      averages += w * h.col(neighborhood(j, i));
    averages /= neighborhood.n_rows;

    // Look through the averages column corresponding to the current user.
    for (size_t j = 0; j < averages.n_rows; ++j)
    {
      // Ensure that the user hasn't already rated the item.
      if (cleanedData(j, users(i)) != 0.0)
        continue; // The user already rated the item.

      // Is the estimated value better than the worst candidate?
      const double value = averages[j];
      if (value > values(values.n_rows - 1, i))
      {
        // It should be inserted.  Which position?
        size_t insertPosition = values.n_rows - 1;
        while (insertPosition > 0)
        {
          if (value <= values(insertPosition - 1, i))
            break; // The current value is the right one.
          insertPosition--;
        }

        // Now insert it into the list.
        InsertNeighbor(i, insertPosition, j, value, recommendations,
            values);
      }
    }

    // If we were not able to come up with enough recommendations, issue a
    // warning.
    if (recommendations(values.n_rows - 1, i) == cleanedData.n_rows + 1)
      Log::Warn << "Could not provide " << values.n_rows << " recommendations "
          << "for user " << users(i) << " (not enough un-rated items)!"
          << std::endl;
  }
}
Esempio n. 15
0
bool Load(const std::string& filename,
          arma::Mat<eT>& matrix,
          DatasetInfo& info,
          const bool fatal,
          const bool transpose)
{
  // Get the extension and load as necessary.
  Timer::Start("loading_data");

  // Get the extension.
  std::string extension = Extension(filename);

  // Catch nonexistent files by opening the stream ourselves.
  std::fstream stream;
  stream.open(filename.c_str(), std::fstream::in);

  if (!stream.is_open())
  {
    Timer::Stop("loading_data");
    if (fatal)
      Log::Fatal << "Cannot open file '" << filename << "'. " << std::endl;
    else
      Log::Warn << "Cannot open file '" << filename << "'; load failed."
          << std::endl;

    return false;
  }

  if (extension == "csv" || extension == "tsv" || extension == "txt")
  {
    // True if we're looking for commas; if false, we're looking for spaces.
    bool commas = (extension == "csv");

    std::string type;
    if (extension == "csv")
      type = "CSV data";
    else
      type = "raw ASCII-formatted data";

    Log::Info << "Loading '" << filename << "' as " << type << ".  "
        << std::flush;
    std::string separators;
    if (commas)
      separators = ",";
    else
      separators = " \t";

    // We'll load this as CSV (or CSV with spaces or tabs) according to
    // RFC4180.  So the first thing to do is determine the size of the matrix.
    std::string buffer;
    size_t cols = 0;

    std::getline(stream, buffer, '\n');
    // Count commas and whitespace in the line, ignoring anything inside
    // quotes.
    typedef boost::tokenizer<boost::escaped_list_separator<char>> Tokenizer;
    boost::escaped_list_separator<char> sep("\\", separators, "\"");
    Tokenizer tok(buffer, sep);
    for (Tokenizer::iterator i = tok.begin(); i != tok.end(); ++i)
      ++cols;

    // Now count the number of lines in the file.  We've already counted the
    // first one.
    size_t rows = 1;
    while (!stream.eof() && !stream.bad() && !stream.fail())
    {
      std::getline(stream, buffer, '\n');
      if (!stream.fail())
        ++rows;
    }

    // Now we have the size.  So resize our matrix.
    if (transpose)
    {
      matrix.set_size(cols, rows);
      info = DatasetInfo(cols);
    }
    else
    {
      matrix.set_size(rows, cols);
      info = DatasetInfo(rows);
    }

    stream.close();
    stream.open(filename, std::fstream::in);

    // Extract line by line.
    std::stringstream token;
    size_t row = 0;
    while (!stream.bad() && !stream.fail() && !stream.eof())
    {
      std::getline(stream, buffer, '\n');

      // Look at each token.  Unfortunately we have to do this character by
      // character, because things may be escaped in quotes.
      Tokenizer lineTok(buffer, sep);
      size_t col = 0;
      for (Tokenizer::iterator it = lineTok.begin(); it != lineTok.end(); ++it)
      {
        // Attempt to extract as type eT.  If that fails, we'll assume it's a
        // string and map it (which may involve retroactively mapping everything
        // we've seen so far).
        token.clear();
        token.str(*it);

        eT val = eT(0);
        token >> val;

        if (token.fail())
        {
          // Conversion failed; but it may be a NaN or inf.  Armadillo has
          // convenient functions to check.
          if (!arma::diskio::convert_naninf(val, token.str()))
          {
            // We need to perform a mapping.
            const size_t dim = (transpose) ? col : row;
            if (info.Type(dim) == Datatype::numeric)
            {
              // We must map everything we have seen up to this point and change
              // the values in the matrix.
              if (transpose)
              {
                // Whatever we've seen so far has successfully mapped to an eT.
                // So we need to print it back to a string.  We'll use
                // Armadillo's functionality for that.
                for (size_t i = 0; i < row; ++i)
                {
                  std::stringstream sstr;
                  arma::arma_ostream::print_elem(sstr, matrix.at(i, col),
                      false);
                  eT newVal = info.MapString(sstr.str(), col);
                  matrix.at(i, col) = newVal;
                }
              }
              else
              {
                for (size_t i = 0; i < col; ++i)
                {
                  std::stringstream sstr;
                  arma::arma_ostream::print_elem(sstr, matrix.at(row, i),
                      false);
                  eT newVal = info.MapString(sstr.str(), row);
                  matrix.at(row, i) = newVal;
                }
              }
            }

            // Strip whitespace from either side of the string.
            std::string trimmedToken(token.str());
            boost::trim(trimmedToken);
            val = info.MapString(trimmedToken, dim);
          }
        }

        if (transpose)
          matrix(col, row) = val;
        else
          matrix(row, col) = val;

        ++col;
      }

      ++row;
    }
  }
void NeighborSearch<SortPolicy, MetricType, TreeType>::Search(
    const size_t k,
    arma::Mat<size_t>& resultingNeighbors,
    arma::mat& distances)
{
  Timer::Start("computing_neighbors");

  // If we have built the trees ourselves, then we will have to map all the
  // indices back to their original indices when this computation is finished.
  // To avoid an extra copy, we will store the neighbors and distances in a
  // separate matrix.
  arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
  arma::mat* distancePtr = &distances;

  if (treeOwner && !(singleMode && hasQuerySet))
    distancePtr = new arma::mat; // Query indices need to be mapped.
  if (treeOwner)
    neighborPtr = new arma::Mat<size_t>; // All indices need mapping.

  // Set the size of the neighbor and distance matrices.
  neighborPtr->set_size(k, querySet.n_cols);
  distancePtr->set_size(k, querySet.n_cols);
  distancePtr->fill(SortPolicy::WorstDistance());

  size_t numPrunes = 0;

  // Create the helper object for the tree traversal.
  typedef NeighborSearchRules<SortPolicy, MetricType, TreeType> RuleType;
  RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr, metric);

  if (singleMode)
  {
    // Create the traverser.
    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    // Now have it traverse for each point.
    for (size_t i = 0; i < querySet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);
  }
  else // Dual-tree recursion.
  {
    // Create the traverser.
    typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

    traverser.Traverse(*queryTree, *referenceTree);

    Log::Info << traverser.NumVisited() << " node combinations were visited.\n";
    Log::Info << traverser.NumScores() << " node combinations were scored.\n";
    Log::Info << traverser.NumBaseCases() << " base cases were calculated.\n";
  }

  Timer::Stop("computing_neighbors");

  // Now, do we need to do mapping of indices?
  if (!treeOwner)
  {
    // No mapping needed.  We are done.
    return;
  }
  else if (treeOwner && hasQuerySet && !singleMode) // Map both sets.
  {
    // Set size of output matrices correctly.
    resultingNeighbors.set_size(k, querySet.n_cols);
    distances.set_size(k, querySet.n_cols);

    for (size_t i = 0; i < distances.n_cols; i++)
    {
      // Map distances (copy a column).
      distances.col(oldFromNewQueries[i]) = distancePtr->col(i);

      // Map indices of neighbors.
      for (size_t j = 0; j < distances.n_rows; j++)
      {
        resultingNeighbors(j, oldFromNewQueries[i]) =
            oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }

    // Finished with temporary matrices.
    delete neighborPtr;
    delete distancePtr;
  }
  else if (treeOwner && !hasQuerySet)
  {
    resultingNeighbors.set_size(k, querySet.n_cols);
    distances.set_size(k, querySet.n_cols);

    for (size_t i = 0; i < distances.n_cols; i++)
    {
      // Map distances (copy a column).
      distances.col(oldFromNewReferences[i]) = distancePtr->col(i);

      // Map indices of neighbors.
      for (size_t j = 0; j < distances.n_rows; j++)
      {
        resultingNeighbors(j, oldFromNewReferences[i]) =
            oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }
  }
  else if (treeOwner && hasQuerySet && singleMode) // Map only references.
  {
    // Set size of neighbor indices matrix correctly.
    resultingNeighbors.set_size(k, querySet.n_cols);

    // Map indices of neighbors.
    for (size_t i = 0; i < resultingNeighbors.n_cols; i++)
    {
      for (size_t j = 0; j < resultingNeighbors.n_rows; j++)
      {
        resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)];
      }
    }

    // Finished with temporary matrix.
    delete neighborPtr;
  }
} // Search
Esempio n. 17
0
void FastMKS<KernelType, TreeType>::Search(
    const typename TreeType::Mat& querySet,
    const size_t k,
    arma::Mat<size_t>& indices,
    arma::mat& kernels)
{
  Timer::Start("computing_products");

  // No remapping will be necessary because we are using the cover tree.
  indices.set_size(k, querySet.n_cols);
  kernels.set_size(k, querySet.n_cols);

  // Naive implementation.
  if (naive)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < querySet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        const double eval = metric.Kernel().Evaluate(querySet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Fill kernels.
    kernels.fill(-DBL_MAX);

    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, querySet, indices, kernels, metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < querySet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.  First, we need to build the query tree.  We are
  // assuming it doesn't map anything...
  Timer::Stop("computing_products");
  Timer::Start("tree_building");
  TreeType queryTree(querySet);
  Timer::Stop("tree_building");

  Search(&queryTree, k, indices, kernels);
}
Esempio n. 18
0
void CF::GetRecommendations(const size_t numRecs,
                            arma::Mat<size_t>& recommendations,
                            arma::Col<size_t>& users)
{
  // We want to avoid calculating the full rating matrix, so we will do nearest
  // neighbor search only on the H matrix, using the observation that if the
  // rating matrix X = W*H, then d(X.col(i), X.col(j)) = d(W H.col(i), W
  // H.col(j)).  This can be seen as nearest neighbor search on the H matrix
  // with the Mahalanobis distance where M^{-1} = W^T W.  So, we'll decompose
  // M^{-1} = L L^T (the Cholesky decomposition), and then multiply H by L^T.
  // Then we can perform nearest neighbor search.
  arma::mat l = arma::chol(w.t() * w);
  arma::mat stretchedH = l * h; // Due to the Armadillo API, l is L^T.

  // Now, we will use the decomposed w and h matrices to estimate what the user
  // would have rated items as, and then pick the best items.

  // Temporarily store feature vector of queried users.
  arma::mat query(stretchedH.n_rows, users.n_elem);

  // Select feature vectors of queried users.
  for (size_t i = 0; i < users.n_elem; i++)
    query.col(i) = stretchedH.col(users(i));

  // Temporary storage for neighborhood of the queried users.
  arma::Mat<size_t> neighborhood;

  // Calculate the neighborhood of the queried users.
  // This should be a templatized option.
  neighbor::KNN a(stretchedH);
  arma::mat resultingDistances; // Temporary storage.
  a.Search(query, numUsersForSimilarity, neighborhood, resultingDistances);

  // Generate recommendations for each query user by finding the maximum numRecs
  // elements in the averages matrix.
  recommendations.set_size(numRecs, users.n_elem);
  arma::mat values(numRecs, users.n_elem);

  for (size_t i = 0; i < users.n_elem; i++)
  {
    // First, calculate average of neighborhood values.
    arma::vec averages;
    averages.zeros(cleanedData.n_rows);

    for (size_t j = 0; j < neighborhood.n_rows; ++j)
      averages += w * h.col(neighborhood(j, i));
    averages /= neighborhood.n_rows;

    // Let's build the list of candidate recomendations for the given user.
    // Default candidate: the smallest possible value and invalid item number.
    const Candidate def = std::make_pair(-DBL_MAX, cleanedData.n_rows);
    std::vector<Candidate> vect(numRecs, def);
    typedef std::priority_queue<Candidate, std::vector<Candidate>, CandidateCmp>
        CandidateList;
    CandidateList pqueue(CandidateCmp(), std::move(vect));

    // Look through the averages column corresponding to the current user.
    for (size_t j = 0; j < averages.n_rows; ++j)
    {
      // Ensure that the user hasn't already rated the item.
      if (cleanedData(j, users(i)) != 0.0)
        continue; // The user already rated the item.


      // Is the estimated value better than the worst candidate?
      if (averages[i] > pqueue.top().first)
      {
        Candidate c = std::make_pair(averages[j], j);
        pqueue.pop();
        pqueue.push(c);
      }
    }

    for (size_t p = 1; p <= numRecs; p++)
    {
      recommendations(numRecs - p, i) = pqueue.top().second;
      values(numRecs - p, i) = pqueue.top().first;
      pqueue.pop();
    }

    // If we were not able to come up with enough recommendations, issue a
    // warning.
    if (recommendations(numRecs - 1, i) == def.second)
      Log::Warn << "Could not provide " << numRecs << " recommendations "
          << "for user " << users(i) << " (not enough un-rated items)!"
          << std::endl;
  }
}
Esempio n. 19
0
void FastMKS<KernelType, TreeType>::Search(const size_t k,
                                           arma::Mat<size_t>& indices,
                                           arma::mat& kernels)
{
  // No remapping will be necessary because we are using the cover tree.
  Timer::Start("computing_products");
  indices.set_size(k, referenceSet.n_cols);
  kernels.set_size(k, referenceSet.n_cols);
  kernels.fill(-DBL_MAX);

  // Naive implementation.
  if (naive)
  {
    // Simple double loop.  Stupid, slow, but a good benchmark.
    for (size_t q = 0; q < referenceSet.n_cols; ++q)
    {
      for (size_t r = 0; r < referenceSet.n_cols; ++r)
      {
        if (q == r)
          continue; // Don't return the point as its own candidate.

        const double eval = metric.Kernel().Evaluate(referenceSet.col(q),
                                                     referenceSet.col(r));

        size_t insertPosition;
        for (insertPosition = 0; insertPosition < indices.n_rows;
            ++insertPosition)
          if (eval > kernels(insertPosition, q))
            break;

        if (insertPosition < indices.n_rows)
          InsertNeighbor(indices, kernels, q, insertPosition, r, eval);
      }
    }

    Timer::Stop("computing_products");

    return;
  }

  // Single-tree implementation.
  if (singleMode)
  {
    // Create rules object (this will store the results).  This constructor
    // precalculates each self-kernel value.
    typedef FastMKSRules<KernelType, TreeType> RuleType;
    RuleType rules(referenceSet, referenceSet, indices, kernels,
        metric.Kernel());

    typename TreeType::template SingleTreeTraverser<RuleType> traverser(rules);

    for (size_t i = 0; i < referenceSet.n_cols; ++i)
      traverser.Traverse(i, *referenceTree);

    // Save the number of pruned nodes.
    const size_t numPrunes = traverser.NumPrunes();

    Log::Info << "Pruned " << numPrunes << " nodes." << std::endl;

    Log::Info << rules.BaseCases() << " base cases." << std::endl;
    Log::Info << rules.Scores() << " scores." << std::endl;

    Timer::Stop("computing_products");
    return;
  }

  // Dual-tree implementation.
  Timer::Stop("computing_products");

  Search(referenceTree, k, indices, kernels);
}
Esempio n. 20
0
void RASearch<SortPolicy, MetricType, TreeType>::
Search(const size_t k,
       arma::Mat<size_t>& resultingNeighbors,
       arma::mat& distances,
       const double tau,
       const double alpha,
       const bool sampleAtLeaves,
       const bool firstLeafExact,
       const size_t singleSampleLimit)
{


    // If we have built the trees ourselves, then we will have to map all the
    // indices back to their original indices when this computation is finished.
    // To avoid an extra copy, we will store the neighbors and distances in a
    // separate matrix.
    arma::Mat<size_t>* neighborPtr = &resultingNeighbors;
    arma::mat* distancePtr = &distances;

    // Mapping is only required if this tree type rearranges points and we are not
    // in naive mode.
    if (tree::TreeTraits<TreeType>::RearrangesDataset)
    {
        if (treeOwner && !(singleMode && hasQuerySet))
            distancePtr = new arma::mat; // Query indices need to be mapped.
        if (treeOwner)
            neighborPtr = new arma::Mat<size_t>; // All indices need mapping.
    }

    // Set the size of the neighbor and distance matrices.
    neighborPtr->set_size(k, querySet.n_cols);
    distancePtr->set_size(k, querySet.n_cols);
    distancePtr->fill(SortPolicy::WorstDistance());

    size_t numPrunes = 0;

    if (naive)
    {
        // We don't need to run the base case on every possible combination of
        // points; we can achieve the rank approximation guarantee with probability
        // alpha by sampling the reference set.
        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        // Find how many samples from the reference set we need and sample uniformly
        // from the reference set without replacement.
        const size_t numSamples = rules.MinimumSamplesReqd(referenceSet.n_cols, k,
                                  tau, alpha);
        arma::uvec distinctSamples;
        rules.ObtainDistinctSamples(numSamples, referenceSet.n_cols,
                                    distinctSamples);

        // Run the base case on each combination of query point and sampled
        // reference point.
        for (size_t i = 0; i < querySet.n_cols; ++i)
            for (size_t j = 0; j < distinctSamples.n_elem; ++j)
                rules.BaseCase(i, (size_t) distinctSamples[j]);
    }
    else if (singleMode)
    {
        // Create the helper object for the tree traversal.  Initialization of
        // RASearchRules already implicitly performs the naive tree traversal.
        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, naive, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        // If the reference root node is a leaf, then the sampling has already been
        // done in the RASearchRules constructor.  This happens when naive = true.
        if (!referenceTree->IsLeaf())
        {
            Rcpp::Rcout << "Performing single-tree traversal..." << std::endl;

            // Create the traverser.
            typename TreeType::template SingleTreeTraverser<RuleType>
            traverser(rules);

            // Now have it traverse for each point.
            for (size_t i = 0; i < querySet.n_cols; ++i)
                traverser.Traverse(i, *referenceTree);

            numPrunes = traverser.NumPrunes();

            Rcpp::Rcout << "Single-tree traversal complete." << std::endl;
            Rcpp::Rcout << "Average number of distance calculations per query point: "
                        << (rules.NumDistComputations() / querySet.n_cols) << "."
                        << std::endl;
        }
    }
    else // Dual-tree recursion.
    {
        Rcpp::Rcout << "Performing dual-tree traversal..." << std::endl;

        typedef RASearchRules<SortPolicy, MetricType, TreeType> RuleType;
        RuleType rules(referenceSet, querySet, *neighborPtr, *distancePtr,
                       metric, tau, alpha, sampleAtLeaves, firstLeafExact,
                       singleSampleLimit);

        typename TreeType::template DualTreeTraverser<RuleType> traverser(rules);

        if (queryTree)
        {
            Rcpp::Rcout << "Query statistic pre-search: "
                        << queryTree->Stat().NumSamplesMade() << std::endl;
            traverser.Traverse(*queryTree, *referenceTree);
        }
        else
        {
            Rcpp::Rcout << "Query statistic pre-search: "
                        << referenceTree->Stat().NumSamplesMade() << std::endl;
            traverser.Traverse(*referenceTree, *referenceTree);
        }

        numPrunes = traverser.NumPrunes();

        Rcpp::Rcout << "Dual-tree traversal complete." << std::endl;
        Rcpp::Rcout << "Average number of distance calculations per query point: "
                    << (rules.NumDistComputations() / querySet.n_cols) << "." << std::endl;
    }


    Rcpp::Rcout << "Pruned " << numPrunes << " nodes." << std::endl;

    // Now, do we need to do mapping of indices?
    if (!treeOwner || !tree::TreeTraits<TreeType>::RearrangesDataset)
    {
        // No mapping needed.  We are done.
        return;
    }
    else if (treeOwner && hasQuerySet && !singleMode) // Map both sets.
    {
        // Set size of output matrices correctly.
        resultingNeighbors.set_size(k, querySet.n_cols);
        distances.set_size(k, querySet.n_cols);

        for (size_t i = 0; i < distances.n_cols; i++)
        {
            // Map distances (copy a column).
            distances.col(oldFromNewQueries[i]) = distancePtr->col(i);

            // Map indices of neighbors.
            for (size_t j = 0; j < distances.n_rows; j++)
            {
                resultingNeighbors(j, oldFromNewQueries[i]) =
                    oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }

        // Finished with temporary matrices.
        delete neighborPtr;
        delete distancePtr;
    }
    else if (treeOwner && !hasQuerySet)
    {
        // No query tree -- map both references and queries.
        resultingNeighbors.set_size(k, querySet.n_cols);
        distances.set_size(k, querySet.n_cols);

        for (size_t i = 0; i < distances.n_cols; i++)
        {
            // Map distances (copy a column).
            distances.col(oldFromNewReferences[i]) = distancePtr->col(i);

            // Map indices of neighbors.
            for (size_t j = 0; j < distances.n_rows; j++)
            {
                resultingNeighbors(j, oldFromNewReferences[i]) =
                    oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }
    }
    else if (treeOwner && hasQuerySet && singleMode) // Map only references.
    {
        // Set size of neighbor indices matrix correctly.
        resultingNeighbors.set_size(k, querySet.n_cols);

        // Map indices of neighbors.
        for (size_t i = 0; i < resultingNeighbors.n_cols; i++)
        {
            for (size_t j = 0; j < resultingNeighbors.n_rows; j++)
            {
                resultingNeighbors(j, i) = oldFromNewReferences[(*neighborPtr)(j, i)];
            }
        }

        // Finished with temporary matrix.
        delete neighborPtr;
    }
} // Search
Esempio n. 21
0
void ConcreteGridView<DuneGridView>::getRawElementDataImpl(
    arma::Mat<CoordinateType> &vertices, arma::Mat<int> &elementCorners,
    arma::Mat<char> &auxData, std::vector<int> *domainIndices) const {
  typedef typename DuneGridView::Grid DuneGrid;
  typedef typename DuneGridView::IndexSet DuneIndexSet;
  const int dimGrid = DuneGrid::dimension;
  const int dimWorld = DuneGrid::dimensionworld;
  const int codimVertex = dimGrid;
  const int codimElement = 0;
  typedef Dune::LeafMultipleCodimMultipleGeomTypeMapper<
      DuneGrid, Dune::MCMGElementLayout> DuneElementMapper;
  typedef typename DuneGridView::template Codim<codimVertex>::Iterator
  DuneVertexIterator;
  typedef typename DuneGridView::template Codim<codimElement>::Iterator
  DuneElementIterator;
  typedef typename DuneGridView::template Codim<codimVertex>::Geometry
  DuneVertexGeometry;
  typedef typename DuneGridView::template Codim<codimElement>::Geometry
  DuneElementGeometry;
  typedef typename DuneGrid::ctype ctype;

  const DuneIndexSet &indexSet = m_dune_gv.indexSet();

  vertices.set_size(dimWorld, indexSet.size(codimVertex));
  for (DuneVertexIterator it = m_dune_gv.template begin<codimVertex>();
       it != m_dune_gv.template end<codimVertex>(); ++it) {
    size_t index = indexSet.index(*it);
    const DuneVertexGeometry &geom = it->geometry();
    Dune::FieldVector<ctype, dimWorld> vertex = geom.corner(0);
    for (int i = 0; i < dimWorld; ++i)
      vertices(i, index) = vertex[i];
  }

  const int MAX_CORNER_COUNT = dimWorld == 2 ? 2 : 4;
  DuneElementMapper elementMapper(m_dune_gv.grid());
  const int elementCount = elementMapper.size();
  elementCorners.set_size(MAX_CORNER_COUNT, elementCount);
  for (DuneElementIterator it = m_dune_gv.template begin<codimElement>();
       it != m_dune_gv.template end<codimElement>(); ++it) {
    size_t index = indexSet.index(*it);
    const Dune::GenericReferenceElement<ctype, dimGrid> &refElement =
        Dune::GenericReferenceElements<ctype, dimGrid>::general(it->type());
    const int cornerCount = refElement.size(codimVertex);
    assert(cornerCount <= MAX_CORNER_COUNT);
    for (int i = 0; i < cornerCount; ++i)
      elementCorners(i, index) = indexSet.subIndex(*it, i, codimVertex);
    for (int i = cornerCount; i < MAX_CORNER_COUNT; ++i)
      elementCorners(i, index) = -1;
  }

  auxData.set_size(0, elementCorners.n_cols);

  if (domainIndices) {
    // Somewhat inelegant: we perform a second iteration over elements,
    // this time using the BEM++ interface to Dune.
    domainIndices->resize(elementCount);
    std::unique_ptr<EntityIterator<0>> it = this->entityIterator<0>();
    const IndexSet &indexSet = this->indexSet();
    while (!it->finished()) {
      const Entity<0> &entity = it->entity();
      const int index = indexSet.entityIndex(entity);
      const int domain = entity.domain();
      acc(*domainIndices, index) = domain;
      it->next();
    }
  }
}