void IAST::getTreeHashImpl(SipHash & hash_state) const { auto id = getID(); hash_state.update(id.data(), id.size()); size_t num_children = children.size(); hash_state.update(reinterpret_cast<const char *>(&num_children), sizeof(num_children)); for (const auto & child : children) child->getTreeHashImpl(hash_state); }
static Compiler::HashedKey getHash(const std::string & key) { SipHash hash; auto revision = ClickHouseRevision::get(); hash.update(reinterpret_cast<const char *>(&revision), sizeof(revision)); hash.update(key.data(), key.size()); Compiler::HashedKey res; hash.get128(res.first, res.second); return res; }
void IAST::updateTreeHash(SipHash & hash_state) const { updateTreeHashImpl(hash_state); hash_state.update(children.size()); for (const auto & child : children) child->updateTreeHash(hash_state); }
void ColumnArray::updateHashWithValue(size_t n, SipHash & hash) const { size_t array_size = sizeAt(n); size_t offset = offsetAt(n); hash.update(array_size); for (size_t i = 0; i < array_size; ++i) getData().updateHashWithValue(offset + i, hash); }
void IAST::updateTreeHashImpl(SipHash & hash_state) const { auto id = getID(); hash_state.update(id.data(), id.size()); }
void ColumnFixedString::updateHashWithValue(size_t index, SipHash & hash) const { hash.update(reinterpret_cast<const char *>(&chars[n * index]), n); }
void ColumnVector<T>::updateHashWithValue(size_t n, SipHash & hash) const { hash.update(data[n]); }
int main(int argc, char ** argv) { using Strings = std::vector<std::string>; using Hashes = std::vector<char>; Strings strings; size_t rows = 0; size_t bytes = 0; { Stopwatch watch; DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); while (!in.eof()) { strings.push_back(std::string()); DB::readEscapedString(strings.back(), in); DB::assertChar('\n', in); bytes += strings.back().size() + 1; } watch.stop(); rows = strings.size(); std::cerr << std::fixed << std::setprecision(2) << "Read " << rows << " rows, " << bytes / 1000000.0 << " MB" << ", elapsed: " << watch.elapsedSeconds() << " (" << rows / watch.elapsedSeconds() << " rows/sec., " << bytes / 1000000.0 / watch.elapsedSeconds() << " MB/sec.)" << std::endl; } Hashes hashes(16 * rows); { Stopwatch watch; for (size_t i = 0; i < rows; ++i) { *reinterpret_cast<UInt64*>(&hashes[i * 16]) = CityHash64(strings[i].data(), strings[i].size()); } watch.stop(); UInt64 check = CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "CityHash64 (check = " << check << ")" << ", elapsed: " << watch.elapsedSeconds() << " (" << rows / watch.elapsedSeconds() << " rows/sec., " << bytes / 1000000.0 / watch.elapsedSeconds() << " MB/sec.)" << std::endl; } /* { Stopwatch watch; std::vector<char> seed(16); for (size_t i = 0; i < rows; ++i) { sipHash( reinterpret_cast<unsigned char *>(&hashes[i * 16]), reinterpret_cast<const unsigned char *>(strings[i].data()), strings[i].size(), reinterpret_cast<const unsigned char *>(&seed[0])); } watch.stop(); UInt64 check = CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "SipHash (check = " << check << ")" << ", elapsed: " << watch.elapsedSeconds() << " (" << rows / watch.elapsedSeconds() << " rows/sec., " << bytes / 1000000.0 / watch.elapsedSeconds() << " MB/sec.)" << std::endl; }*/ { Stopwatch watch; for (size_t i = 0; i < rows; ++i) { SipHash hash; hash.update(strings[i].data(), strings[i].size()); hash.get128(&hashes[i * 16]); } watch.stop(); UInt64 check = CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "SipHash, stream (check = " << check << ")" << ", elapsed: " << watch.elapsedSeconds() << " (" << rows / watch.elapsedSeconds() << " rows/sec., " << bytes / 1000000.0 / watch.elapsedSeconds() << " MB/sec.)" << std::endl; } { Stopwatch watch; for (size_t i = 0; i < rows; ++i) { MD5_CTX state; MD5_Init(&state); MD5_Update(&state, reinterpret_cast<const unsigned char *>(strings[i].data()), strings[i].size()); MD5_Final(reinterpret_cast<unsigned char *>(&hashes[i * 16]), &state); } watch.stop(); UInt64 check = CityHash64(&hashes[0], hashes.size()); std::cerr << std::fixed << std::setprecision(2) << "MD5 (check = " << check << ")" << ", elapsed: " << watch.elapsedSeconds() << " (" << rows / watch.elapsedSeconds() << " rows/sec., " << bytes / 1000000.0 / watch.elapsedSeconds() << " MB/sec.)" << std::endl; } return 0; }