int main(int argc, char ** argv) { size_t n = atoi(argv[1]); size_t m = atoi(argv[2]); DB::Arena pool; std::vector<StringRef> data(n); std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl; { Stopwatch watch; DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); DB::CompressedReadBuffer in2(in1); std::string tmp; for (size_t i = 0; i < n && !in2.eof(); ++i) { DB::readStringBinary(tmp, in2); data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size()); } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } if (!m || m == 1) bench<StringRef_Compare1_Ptrs> (data, "StringRef_Compare1_Ptrs"); if (!m || m == 2) bench<StringRef_Compare1_Index> (data, "StringRef_Compare1_Index"); if (!m || m == 3) bench<StringRef_CompareMemcmp> (data, "StringRef_CompareMemcmp"); if (!m || m == 4) bench<StringRef_Compare8_1_byUInt64> (data, "StringRef_Compare8_1_byUInt64"); if (!m || m == 5) bench<StringRef_Compare16_1_byMemcmp> (data, "StringRef_Compare16_1_byMemcmp"); if (!m || m == 6) bench<StringRef_Compare16_1_byUInt64_logicAnd>(data, "StringRef_Compare16_1_byUInt64_logicAnd"); if (!m || m == 7) bench<StringRef_Compare16_1_byUInt64_bitAnd> (data, "StringRef_Compare16_1_byUInt64_bitAnd"); #if __SSE4_1__ if (!m || m == 8) bench<StringRef_Compare16_1_byIntSSE> (data, "StringRef_Compare16_1_byIntSSE"); if (!m || m == 9) bench<StringRef_Compare16_1_byFloatSSE> (data, "StringRef_Compare16_1_byFloatSSE"); if (!m || m == 10) bench<StringRef_Compare16_1_bySSE4> (data, "StringRef_Compare16_1_bySSE4"); if (!m || m == 11) bench<StringRef_Compare16_1_bySSE4_wide> (data, "StringRef_Compare16_1_bySSE4_wide"); if (!m || m == 12) bench<StringRef_Compare16_1_bySSE_wide> (data, "StringRef_Compare16_1_bySSE_wide"); #endif if (!m || m == 100) bench<StringRef_CompareAlwaysTrue> (data, "StringRef_CompareAlwaysTrue"); if (!m || m == 101) bench<StringRef_CompareAlmostAlwaysTrue> (data, "StringRef_CompareAlmostAlwaysTrue"); /// 10 > 8, 9 /// 1, 2, 5 - bad return 0; }
int main(int argc, char ** argv) { size_t n = atoi(argv[1]); size_t m = atoi(argv[2]); DB::Arena pool; std::vector<StringRef> data(n); std::cerr << "sizeof(Key) = " << sizeof(StringRef) << ", sizeof(Value) = " << sizeof(Value) << std::endl; { Stopwatch watch; DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); DB::CompressedReadBuffer in2(in1); std::string tmp; for (size_t i = 0; i < n && !in2.eof(); ++i) { DB::readStringBinary(tmp, in2); data[i] = StringRef(pool.insert(tmp.data(), tmp.size()), tmp.size()); } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } if (!m || m == 1) bench<StringRef_CompareMemcmp, DefaultHash<StringRef>>(data, "StringRef_CityHash64"); if (!m || m == 2) bench<StringRef_CompareMemcmp, FastHash64> (data, "StringRef_FastHash64"); if (!m || m == 3) bench<StringRef_CompareMemcmp, SimpleHash> (data, "StringRef_SimpleHash"); #if defined(__x86_64__) if (!m || m == 4) bench<StringRef_CompareMemcmp, CrapWow> (data, "StringRef_CrapWow"); if (!m || m == 5) bench<StringRef_CompareMemcmp, CRC32Hash> (data, "StringRef_CRC32Hash"); if (!m || m == 6) bench<StringRef_CompareMemcmp, CRC32ILPHash> (data, "StringRef_CRC32ILPHash"); #endif if (!m || m == 7) bench<StringRef_CompareMemcmp, VerySimpleHash>(data, "StringRef_VerySimpleHash"); if (!m || m == 8) bench<StringRef_CompareMemcmp, FarmHash64>(data, "StringRef_FarmHash64"); if (!m || m == 9) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_1>>(data, "StringRef_MetroHash64_1"); if (!m || m == 10) bench<StringRef_CompareMemcmp, MetroHash64<metrohash64_2>>(data, "StringRef_MetroHash64_2"); return 0; }
int main(int argc, char ** argv) { std::cerr << std::fixed << std::setprecision(3); std::ofstream devnull("/dev/null"); DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); size_t n = atoi(argv[1]); size_t elems_show = 1; using Vec = std::vector<std::string>; using Set = std::unordered_map<std::string, int>; using RefsSet = std::unordered_map<StringRef, int, StringRefHash>; using DenseSet = google::dense_hash_map<std::string, int>; using RefsDenseSet = google::dense_hash_map<StringRef, int, StringRefHash>; using RefsHashMap = HashMap<StringRef, int, StringRefHash>; Vec vec; vec.reserve(n); { Stopwatch watch; std::string s; for (size_t i = 0; i < n && !in.eof(); ++i) { DB::readEscapedString(s, in); DB::assertChar('\n', in); vec.push_back(s); } std::cerr << "Read and inserted into vector in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; } { DB::Arena pool; Stopwatch watch; const char * res = nullptr; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { const char * tmp = pool.insert(it->data(), it->size()); if (it == vec.begin()) res = tmp; } std::cerr << "Inserted into pool in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; devnull.write(res, 100); devnull << std::endl; } { Set set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (Set::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(*it)] = 0; std::cerr << "Inserted refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsSet set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into std::unordered_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DenseSet set; set.set_empty_key(DenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[*it] = 0; std::cerr << "Inserted into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (DenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull << it->first; devnull << std::endl; } } { RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(it->data(), it->size())] = 0; std::cerr << "Inserted refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { DB::Arena pool; RefsDenseSet set; set.set_empty_key(RefsDenseSet::key_type()); Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) set[StringRef(pool.insert(it->data(), it->size()), it->size())] = 0; std::cerr << "Inserted into pool and refs into google::dense_hash_map in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsDenseSet::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } { RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(*it), inserted_it, inserted); } std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } //std::cerr << set.size() << ", " << set.getCollisions() << std::endl; } { DB::Arena pool; RefsHashMap set; Stopwatch watch; for (Vec::iterator it = vec.begin(); it != vec.end(); ++it) { RefsHashMap::iterator inserted_it; bool inserted; set.emplace(StringRef(pool.insert(it->data(), it->size()), it->size()), inserted_it, inserted); } std::cerr << "Inserted into pool and refs into HashMap in " << watch.elapsedSeconds() << " sec, " << vec.size() / watch.elapsedSeconds() << " rows/sec., " << in.count() / watch.elapsedSeconds() / 1000000 << " MB/sec." << std::endl; size_t i = 0; for (RefsHashMap::const_iterator it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { devnull.write(it->first.data, it->first.size); devnull << std::endl; } } return 0; }
int main(int argc, char ** argv) { size_t n = atoi(argv[1]); size_t m = atoi(argv[2]); DB::Arena pool; std::vector<Key> data(n); std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl; { Stopwatch watch; DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); DB::CompressedReadBuffer in2(in1); std::string tmp; for (size_t i = 0; i < n && !in2.eof(); ++i) { DB::readStringBinary(tmp, in2); data[i] = Key(pool.insert(tmp.data(), tmp.size()), tmp.size()); } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } if (!m || m == 1) { Stopwatch watch; //using Map = HashMap<Key, Value>; /// Saving the hash accelerates the resize by about 2 times, and the overall performance by 6-8%. using Map = HashMapWithSavedHash<Key, Value, DefaultHash<Key>, Grower>; Map map; Map::iterator it; bool inserted; for (size_t i = 0; i < n; ++i) { map.emplace(data[i], it, inserted); if (inserted) it->second = 0; ++it->second; } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "HashMap (CityHash64). Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS << ", collisions: " << map.getCollisions() #endif << std::endl; } if (!m || m == 2) { Stopwatch watch; using Map = HashMapWithSavedHash<Key, Value, FastHash64, Grower>; Map map; Map::iterator it; bool inserted; for (size_t i = 0; i < n; ++i) { map.emplace(data[i], it, inserted); if (inserted) it->second = 0; ++it->second; } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "HashMap (FastHash64). Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS << ", collisions: " << map.getCollisions() #endif << std::endl; } if (!m || m == 3) { Stopwatch watch; using Map = HashMapWithSavedHash<Key, Value, CrapWow, Grower>; Map map; Map::iterator it; bool inserted; for (size_t i = 0; i < n; ++i) { map.emplace(data[i], it, inserted); if (inserted) it->second = 0; ++it->second; } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "HashMap (CrapWow). Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS << ", collisions: " << map.getCollisions() #endif << std::endl; } if (!m || m == 4) { Stopwatch watch; using Map = HashMapWithSavedHash<Key, Value, SimpleHash, Grower>; Map map; Map::iterator it; bool inserted; for (size_t i = 0; i < n; ++i) { map.emplace(data[i], it, inserted); if (inserted) it->second = 0; ++it->second; } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "HashMap (SimpleHash). Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS << ", collisions: " << map.getCollisions() #endif << std::endl; } if (!m || m == 5) { Stopwatch watch; std::unordered_map<Key, Value, DefaultHash<Key> > map; for (size_t i = 0; i < n; ++i) ++map[data[i]]; watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "std::unordered_map. Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } if (!m || m == 6) { Stopwatch watch; google::dense_hash_map<Key, Value, DefaultHash<Key> > map; map.set_empty_key(Key("\0", 1)); for (size_t i = 0; i < n; ++i) ++map[data[i]]; watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "google::dense_hash_map. Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } if (!m || m == 7) { Stopwatch watch; google::sparse_hash_map<Key, Value, DefaultHash<Key> > map; for (size_t i = 0; i < n; ++i) ++map[data[i]]; watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "google::sparse_hash_map. Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" << std::endl; } return 0; }