void testPow() { #ifdef NDEBUG const size_t n = 1000; #else const size_t n = 1; #endif Vuint m = 2, d = 2; for (size_t i = 0; i < 160 - 1; i++) { m *= d; } m += 7; ZmZ<>::setModulo(m); TEST_EQUAL(m, Vuint("1461501637330902918203684832716283019655932542983")); ZmZ<> x, y = ZmZ<>(m) - 1; Xbyak::util::Clock clk; for (size_t i = 0; i < n; i++) { x = 3; clk.begin(); x = power(x, y); clk.end(); } TEST_EQUAL(x, 1); // clk=1428899.430clk (VariableBuffer) // clk= 555733.590clk (FixedBuffer) printf("clk=%.3fclk\n", clk.getClock() / double(clk.getCount())); }
void bench() { Xbyak::util::Clock clk; const size_t N = 1000; const size_t M = 1000; Vuint m("14615016373309029182036848327162830196559325429831461501636150163733090291820368483271628301965593254298314615016373309029182036848327162830196559325429831461501637330902918203684832716283019655932542983"); ZmZ<>::setModulo(m); // const char *str = "123456789012342342342499924234242422333333333333333333333256789112345678901234234234242423424242233333333333333333333325678911234567890123423423424242342424223333333333333333333332567891"; const char *str = "82434016654300679721217353503190038836571781811386228921167322412819029493182"; // ZmZ<> a(str); Vuint a(str), b(a); for (size_t i = 0; i < N; i++) { clk.begin(); for (size_t j = 0; j < M; j++) { // a *= a; Vuint::mul(b, a, a); // 4268clk // Vuint::mul1(b, a, 123456789); // 128clk // Vuint::add(b, a, a); // 113clk // local::PrimitiveFunction::add1(&b[0], &a[0], a.size(), 1234566); // local::PrimitiveFunction::mul1(&b[0], &a[0], a.size(), 1234566); // Vuint::sub(b, a, a); // 140clk } clk.end(); } printf("clk=%.3fclk\n", clk.getClock() / double(clk.getCount()) / M); }
Ret benchmark1(const std::basic_string<C>& str, const std::basic_string<C>& key, F f) { const int N = 1; int val = 0; f.set(str, key); Xbyak::util::Clock clk; for (int i = 0; i < N; i++) { typename F::type p = f.begin(); typename F::type end = f.end(); for (;;) { clk.begin(); typename F::type q = f.find(p); clk.end(); if (q == end) break; val += (int)(q - p); p = q + 1; } } if (val == 0) val = (int)(str.size()) * N; Ret ret; ret.val = val; ret.clk = clk.getClock() / (double)val; return ret; }
double getDummyLoopClock(size_t n, size_t bitLen) { uint64_t ret = 0; Xbyak::util::Clock clk; #ifdef USE_C11 g_rg.seed(0); std::uniform_int_distribution<uint64_t> dist(0, (1ULL << bitLen) - 1); #else XorShift128 r; const uint64_t mask = (1ULL << bitLen) - 1; #endif const int lp = 5; for (int i = 0; i < lp; i++) { clk.begin(); for (size_t i = 0; i < n; i++) { #ifdef USE_C11 uint64_t v = dist(g_rg); #else uint64_t v = r.get64(); v += r.get() >> 5; v &= mask; #endif ret += v; } clk.end(); } printf("(%llx)", (long long)ret); return clk.getClock() / double(n) / lp; }
void test(const char *msg, void (*f)()) { Xbyak::util::Clock clk; counter = 0; clk.begin(); f(); clk.end(); printf("%s %.2f clk/loop counter=%lld\n", msg, clk.getClock() / double(clk.getCount() * N), (long long)counter); }
void test(const std::string& s, F f) { const int M = 100; uint64_t x = 0; Xbyak::util::Clock clk; for (int i = 0; i < M; i++) { clk.begin(); x += f((const uint8_t*)s.c_str(), s.size()); clk.end(); } printf("x=%16lld, %.2fclk\n", (long long)x, clk.getClock() / double(M) / s.size()); }
int main() { const int count = 1000; Xbyak::util::Clock clk; Code c; void (*f)() = (void (*)())c.getCode(); for (int i = 0; i < count; i++) { clk.begin(); f(); clk.end(); } printf("%.3fclk\n", clk.getClock() / double(N) / clk.getCount()); }
void test(size_t n, bool doPut, F pred) { IntVec iv; PtrVec pv; init(iv, pv, n); if (doPut) put(pv); Xbyak::util::Clock clk; clk.begin(); std::sort(pv.begin(), pv.end(), pred); clk.end(); int sum = std::accumulate(iv.begin(), iv.end(), 0); printf("clk=%.2fclk, sum=%d\n", clk.getClock() / double(n), sum); if (doPut) put(pv); }
int main(int argc, char *argv[]) { int mode = argc == 1 ? -1 : atoi(argv[1]); for (int i = 0; i < 2; i++) { if (mode >= 0 && mode != i) continue; Code c(i == 0); void (*f)() = c.getCode<void (*)()>(); Xbyak::util::Clock clk; clk.begin(); f(); clk.end(); printf("%.2fclk\n", clk.getClock() / double(N)); } }
void test(int mode) { printf("mode:%6s ", mode2str(mode)); memset(data, 0, sizeof(data)); double time = 0; { Xbyak::util::Clock clk; clk.begin(); std::thread t1(write1[mode]); std::thread t2(write2[mode]); t1.join(); t2.join(); clk.end(); time = (double)clk.getClock(); } int num[4] = { }; for (int i = 0; i < N; i++) { const Data& d = data[i]; if (d.sa == 0 || d.sb == 0) { printf("ERR %d %d\n", d.sa, d.sb); exit(1); } if (d.r1 == 0 && d.r2 == 0) { num[0]++; } else if (d.r1 == 1 && d.r2 == 0) { num[1]++; } else if (d.r1 == 0 && d.r2 == 1) { num[2]++; } else { num[3]++; } } int sum = 0; printf("num="); for (int i = 0; i < 4; i++) { printf("%8d ", num[i]); sum += num[i]; } if (sum != N) { fprintf(stderr, "ERR sum=%d\n", sum); exit(1); } printf("clk=%7.3fMclk\n", time * 1e-6); }
void bench(int mode) { const int N = 100000; Code code; code.makeBench(N, mode); int (*p)(uint64_t*, const uint64_t*, const uint64_t*) = (int (*)(uint64_t*, const uint64_t*, const uint64_t*))code.getCode(); uint64_t a[4] = { uint64_t(-1), uint64_t(-2), uint64_t(-3), 544443221 }; uint64_t b[4] = { uint64_t(-123), uint64_t(-3), uint64_t(-4), 222222222 }; uint64_t c[5] = { 0, 0, 0, 0, 0 }; const int M = 100; Xbyak::util::Clock clk; for (int i = 0; i < M; i++) { clk.begin(); p(c, a, b); clk.end(); } printf("%.2fclk\n", clk.getClock() / double(M) / double(N) / innerN); }
uint64_t bench(const uint64_t *block, size_t blockNum, size_t n, size_t bitLen, double baseClk, bool useSelect) { const T sbv(block, blockNum); uint64_t ret = 0; Xbyak::util::Clock clk; #ifdef USE_C11 std::uniform_int_distribution<uint64_t> dist(0, (1ULL << bitLen) - 1); #else XorShift128 r; const uint64_t mask = (1ULL << bitLen) - 1; #endif const int lp = 5; for (int j = 0; j < lp; j++) { clk.begin(); for (size_t i = 0; i < n; i++) { #ifdef USE_C11 uint64_t v = dist(g_rg); #else uint64_t v = r.get64(); v &= mask; #endif ret += sbv.rank1(v); } clk.end(); } printf("%11lld ret %08x %6.2f clk(%6.2f)\n", 1LL << bitLen, (int)ret, (double)clk.getClock() / double(n) / lp - baseClk, baseClk); if (useSelect) { clk.clear(); const size_t maxNum = sbv.rank1(blockNum * 64 - 1); for (int j = 0; j < lp; j++) { clk.begin(); for (size_t i = 0; i < n; i++) { #ifdef USE_C11 uint64_t v = dist(g_rg); #else uint64_t v = r.get64(); v %= maxNum; #endif ret += sbv.select1(v); } clk.end(); } printf("%11lld ret %08x %6.2f clk(%6.2f)\n", 1LL << bitLen, (int)ret, (double)clk.getClock() / double(n) / lp - baseClk, baseClk); } return ret; }