Exemplo n.º 1
0
void PreRanker::Filter(bool viewportSearch)
{
  using TSet = set<impl::PreResult1, LessFeatureID>;
  TSet theSet;

  sort(m_results.begin(), m_results.end(), ComparePreResult1());
  m_results.erase(unique(m_results.begin(), m_results.end(), EqualFeatureID()), m_results.end());

  sort(m_results.begin(), m_results.end(), &impl::PreResult1::LessDistance);

  if (m_limit != 0 && m_results.size() > m_limit)
  {
    // Priority is some kind of distance from the viewport or
    // position, therefore if we have a bunch of results with the same
    // priority, we have no idea here which results are relevant.  To
    // prevent bias from previous search routines (like sorting by
    // feature id) this code randomly selects tail of the
    // sorted-by-priority list of pre-results.

    double const last = m_results[m_limit - 1].GetDistance();

    auto b = m_results.begin() + m_limit - 1;
    for (; b != m_results.begin() && b->GetDistance() == last; --b)
      ;
    if (b->GetDistance() != last)
      ++b;

    auto e = m_results.begin() + m_limit;
    for (; e != m_results.end() && e->GetDistance() == last; ++e)
      ;

    // The main reason of shuffling here is to select a random subset
    // from the low-priority results. We're using a linear
    // congruential method with default seed because it is fast,
    // simple and doesn't need an external entropy source.
    //
    // TODO (@y, @m, @vng): consider to take some kind of hash from
    // features and then select a subset with smallest values of this
    // hash.  In this case this subset of results will be persistent
    // to small changes in the original set.
    minstd_rand engine;
    shuffle(b, e, engine);
  }
  theSet.insert(m_results.begin(), m_results.begin() + min(m_results.size(), m_limit));

  if (!viewportSearch)
  {
    size_t n = min(m_results.size(), m_limit);
    nth_element(m_results.begin(), m_results.begin() + n, m_results.end(),
                &impl::PreResult1::LessRank);
    theSet.insert(m_results.begin(), m_results.begin() + n);
  }

  m_results.reserve(theSet.size());
  m_results.clear();
  copy(theSet.begin(), theSet.end(), back_inserter(m_results));
}
Exemplo n.º 2
0
 /// delete p if it was already in set; return version in set.
 T* insert(T* p) {
   std::pair<typename TSet::iterator, bool> iNew = set_.insert(p);
   if (iNew.second)
     return p;
   else {
     delete p;
     return *iNew.first;
   }
 }
Exemplo n.º 3
0
bool FBullCowGame::IsIsogram(FString Guess) const {
  if (Guess.length() < 2) return true;

  TSet<char> set;
  for (char Letter : Guess) {
    Letter = tolower(Letter);
    if (set.find(Letter) != set.end()) return false;
    else set.insert(Letter);
  }
}
Exemplo n.º 4
0
void CCueDocument::GetMediaFiles(vector<std::string>& mediaFiles)
{
  typedef set<std::string> TSet;
  TSet uniqueFiles;
  for (Tracks::const_iterator it = m_tracks.begin(); it != m_tracks.end(); ++it)
    uniqueFiles.insert(it->strFile);

  for (TSet::const_iterator it = uniqueFiles.begin(); it != uniqueFiles.end(); it++)
    mediaFiles.push_back(*it);
}
Exemplo n.º 5
0
TVector<TColumn> ReadCD(const TString& fileName, const TCdParserDefaults& defaults) {
    CB_ENSURE(NFs::Exists(TString(fileName)), "column description file is not found");
    int columnsCount = defaults.UseDefaultType ? defaults.ColumnCount : 0;

    TVector<TColumn> columns(columnsCount, TColumn{defaults.DefaultColumnType, TString()});
    TSet<int> parsedColumns;

    TString line;
    TIFStream reader(fileName.c_str());
    while (reader.ReadLine(line)) {
        TVector<TString> tokens;
        try {
            Split(line, "\t", tokens);
        } catch (const yexception& e) {
            MATRIXNET_DEBUG_LOG << "Got exception " << e.what() << " while parsing feature descriptions line " << line << Endl;
            break;
        }
        if (tokens.empty()) {
            continue;
        }
        CB_ENSURE(tokens.ysize() == 2 || tokens.ysize() == 3, "Each line should have two or three columns. " << line);
        int index = FromString<int>(tokens[0]);
        CB_ENSURE(index >= 0, "Invalid column index " << index);
        if (defaults.UseDefaultType) {
            CB_ENSURE(index < columnsCount, "Invalid column index " << index);
        }
        CB_ENSURE(!parsedColumns.has(index), "column specified twice in cd file: " << index);
        parsedColumns.insert(index);
        columns.resize(Max(columns.ysize(), index + 1));

        TStringBuf type = tokens[1];
        if (type == "QueryId") {
            type = "GroupId";
        }
        if (type == "Target") {
            type = "Label";
        }
        CB_ENSURE(TryFromString<EColumn>(type, columns[index].Type), "unsupported column type " << type);
        if (tokens.ysize() == 3) {
            columns[index].Id = tokens[2];
        }
    }
    if (!defaults.UseDefaultType) {
        CheckAllFeaturesPresent(columns, parsedColumns);
    }

    return columns;
}