void PreRanker::Filter(bool viewportSearch) { using TSet = set<impl::PreResult1, LessFeatureID>; TSet theSet; sort(m_results.begin(), m_results.end(), ComparePreResult1()); m_results.erase(unique(m_results.begin(), m_results.end(), EqualFeatureID()), m_results.end()); sort(m_results.begin(), m_results.end(), &impl::PreResult1::LessDistance); if (m_limit != 0 && m_results.size() > m_limit) { // Priority is some kind of distance from the viewport or // position, therefore if we have a bunch of results with the same // priority, we have no idea here which results are relevant. To // prevent bias from previous search routines (like sorting by // feature id) this code randomly selects tail of the // sorted-by-priority list of pre-results. double const last = m_results[m_limit - 1].GetDistance(); auto b = m_results.begin() + m_limit - 1; for (; b != m_results.begin() && b->GetDistance() == last; --b) ; if (b->GetDistance() != last) ++b; auto e = m_results.begin() + m_limit; for (; e != m_results.end() && e->GetDistance() == last; ++e) ; // The main reason of shuffling here is to select a random subset // from the low-priority results. We're using a linear // congruential method with default seed because it is fast, // simple and doesn't need an external entropy source. // // TODO (@y, @m, @vng): consider to take some kind of hash from // features and then select a subset with smallest values of this // hash. In this case this subset of results will be persistent // to small changes in the original set. minstd_rand engine; shuffle(b, e, engine); } theSet.insert(m_results.begin(), m_results.begin() + min(m_results.size(), m_limit)); if (!viewportSearch) { size_t n = min(m_results.size(), m_limit); nth_element(m_results.begin(), m_results.begin() + n, m_results.end(), &impl::PreResult1::LessRank); theSet.insert(m_results.begin(), m_results.begin() + n); } m_results.reserve(theSet.size()); m_results.clear(); copy(theSet.begin(), theSet.end(), back_inserter(m_results)); }
/// delete p if it was already in set; return version in set. T* insert(T* p) { std::pair<typename TSet::iterator, bool> iNew = set_.insert(p); if (iNew.second) return p; else { delete p; return *iNew.first; } }
bool FBullCowGame::IsIsogram(FString Guess) const { if (Guess.length() < 2) return true; TSet<char> set; for (char Letter : Guess) { Letter = tolower(Letter); if (set.find(Letter) != set.end()) return false; else set.insert(Letter); } }
void CCueDocument::GetMediaFiles(vector<std::string>& mediaFiles) { typedef set<std::string> TSet; TSet uniqueFiles; for (Tracks::const_iterator it = m_tracks.begin(); it != m_tracks.end(); ++it) uniqueFiles.insert(it->strFile); for (TSet::const_iterator it = uniqueFiles.begin(); it != uniqueFiles.end(); it++) mediaFiles.push_back(*it); }
TVector<TColumn> ReadCD(const TString& fileName, const TCdParserDefaults& defaults) { CB_ENSURE(NFs::Exists(TString(fileName)), "column description file is not found"); int columnsCount = defaults.UseDefaultType ? defaults.ColumnCount : 0; TVector<TColumn> columns(columnsCount, TColumn{defaults.DefaultColumnType, TString()}); TSet<int> parsedColumns; TString line; TIFStream reader(fileName.c_str()); while (reader.ReadLine(line)) { TVector<TString> tokens; try { Split(line, "\t", tokens); } catch (const yexception& e) { MATRIXNET_DEBUG_LOG << "Got exception " << e.what() << " while parsing feature descriptions line " << line << Endl; break; } if (tokens.empty()) { continue; } CB_ENSURE(tokens.ysize() == 2 || tokens.ysize() == 3, "Each line should have two or three columns. " << line); int index = FromString<int>(tokens[0]); CB_ENSURE(index >= 0, "Invalid column index " << index); if (defaults.UseDefaultType) { CB_ENSURE(index < columnsCount, "Invalid column index " << index); } CB_ENSURE(!parsedColumns.has(index), "column specified twice in cd file: " << index); parsedColumns.insert(index); columns.resize(Max(columns.ysize(), index + 1)); TStringBuf type = tokens[1]; if (type == "QueryId") { type = "GroupId"; } if (type == "Target") { type = "Label"; } CB_ENSURE(TryFromString<EColumn>(type, columns[index].Type), "unsupported column type " << type); if (tokens.ysize() == 3) { columns[index].Id = tokens[2]; } } if (!defaults.UseDefaultType) { CheckAllFeaturesPresent(columns, parsedColumns); } return columns; }