void run(size_t bitLen) { if (bitLen < 8) { printf("too small bitLen=%d\n", (int)bitLen); exit(1); } const size_t N = size_t(1) << bitLen; RandomGenerator rg; printf("%09llx\n", (long long)N); puts("init"); Vec8 v; v.resize(N); for (size_t i = 0; i < 256; i++) { v[i] = uint8_t(i); } for (size_t i = 256; i < N; i++) { v[i] = uint8_t(rg()); } puts("start"); bench<cybozu::WaveletMatrix>("wm", v, N); #ifdef COMPARE_WAT bench<Wat>("wat", v, N); #endif #ifdef COMPARE_WAVELET bench<Wavelet>("wavelet", v, N); #endif #ifdef COMPARE_SHELLINFORD bench<Shellinford>("shellinford", v, N); #endif }
void init(const Vec8& v8, int) { std::vector<uint64_t> v64; v64.resize(v8.size()); for (size_t i = 0; i < v8.size(); i++) { v64[i] = v8[i]; } wm.Init(v64); }
void bench_select(const T& wm, const Vec8& v8, RG& rg, size_t C) { cybozu::disable_warning_unused_variable(v8); size_t ret = 0; std::vector<int> maxTbl; maxTbl.resize(256); for (int i = 0; i < 256; i++) { int v = (int)wm.size(i); if (v == 0) v = 1; maxTbl[i] = v; } double begin = cybozu::GetCurrentTimeSec(); for (size_t i = 0; i < C; i++) { uint8_t c = uint8_t(rg()); size_t pos = rg() % maxTbl[c]; uint64_t a = wm.select(c, pos); #if 0 uint64_t b = v8.select(c, pos); if (a != b) { printf("ERR i=%d a=%d b=%d c=%d pos=%d\n", (int)i, (int)a, (int)b, (int)c, (int)pos); exit(1); } #endif ret += a; } double t = cybozu::GetCurrentTimeSec() - begin; printf("select %08x %9.2fusec\n", (int)ret, t / C * 1e6); }
void put() const { putSub(); if (freqTbl.empty()) return; const uint32_t compSize = (uint32_t)vec.size(); for (size_t i = 0; i < freqTbl.size(); i++) { printf("freqTbl[%2d] = %8d(%5.2f%%, %5.2f%%)\n", (int)i, freqTbl[i], freqTbl[i] * 100.0 / compSize, freqTbl[i] * encTbl[i].len * 100.0 / bitSize_); } }
void putSub() const { const uint32_t inSize = bitSize_ / 8; const uint32_t compSize = (uint32_t)vec.size(); const uint32_t idxSize = (uint32_t)(blkVec.size() * sizeof(blkVec[0])); const double cr = compSize * 100.0 / inSize; const double ir = idxSize * 100.0 / inSize; if (inSize == 0) return; printf("in Size= %9d, rank=%u\n", inSize, rk_); printf("comp Size= %9u\n", compSize); printf("idx Size= %9u(blkVec.size=%7u)\n", idxSize, (uint32_t)blkVec.size()); printf("totalSize= %9u\n", compSize + idxSize); printf("rate=%5.2f%%(%5.2f%% + %5.2f%%)\n", cr + ir, cr, ir); }
void initBlockVec() { blkVec.reserve(bitSize_ / skip + 16); uint32_t orgPos = 0; uint32_t rk = 0; uint32_t samplingPos = 0; for (size_t vecPos = 0, n = vec.size(); vecPos < n; vecPos++) { uint8_t v = vec[vecPos]; uint32_t next = orgPos + encTbl[v].len; while (samplingPos < next) { blkVec.push_back(Block(orgPos, (uint32_t)vecPos, rk)); samplingPos += skip; } orgPos = next; rk += encTbl[v].rk; } }
OutputStream(Vec32& freqTbl, Vec8& vec, uint32_t& rk, const uint64_t *buf, uint32_t bitSize, const EncodingTbl& encTbl) : freqTbl(freqTbl) , vec(vec) , rk(rk) , bi(encTbl) , encTbl(encTbl) { csucvector_util::InputStream is(buf, bitSize); freqTbl.clear(); freqTbl.resize(encTbl.size()); vec.clear(); rk = 0; for (;;) { uint32_t s = append(is); is.consume(s); if (is.empty()) break; } printf("bitSize=%u\n",bitSize); }
void bench_rank(const T& wm, const Vec8& v8, RG& rg, size_t C, size_t N) { cybozu::disable_warning_unused_variable(v8); size_t ret = 0; double begin = cybozu::GetCurrentTimeSec(); for (size_t i = 0; i < C; i++) { size_t pos = rg() & (N - 1); uint8_t c = uint8_t(rg()); uint64_t a = wm.rank(c, pos); #if 0 uint64_t b = v8.rank(c, pos); if (a != b) { printf("ERR i=%d a=%d b=%d c=%d pos=%d\n", (int)i, (int)a, (int)b, (int)c, (int)pos); exit(1); } #endif ret += a; } double t = cybozu::GetCurrentTimeSec() - begin; printf("rank %08x %9.2fusec\n", (int)ret, t / C * 1e6); }