TEST dct_speed(const int width) { const int size = width * width; uint64_t call_cnt = 0; dct_func * tested_func = test_env.strategy->fptr; KVZ_CLOCK_T clock_now; KVZ_GET_TIME(&clock_now); double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST; int16_t _tmp_residual[32 * 32 + SIMD_ALIGNMENT]; int16_t _tmp_coeffs[32 * 32 + SIMD_ALIGNMENT]; int16_t *tmp_residual = ALIGNED_POINTER(_tmp_residual, SIMD_ALIGNMENT); int16_t *tmp_coeffs = ALIGNED_POINTER(_tmp_coeffs, SIMD_ALIGNMENT); // Loop until time allocated for test has passed. for (unsigned i = 0; test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now); ++i) { int test = i % NUM_TESTS; uint64_t sum = 0; for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Compare the first chunk against the 35 other chunks to simulate real usage. for (int chunk = 0; chunk < NUM_CHUNKS; ++chunk) { kvz_pixel * buf1 = &bufs[test][offset]; kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; for (int p = 0; p < size; ++p) { tmp_residual[p] = (int16_t)(buf1[p] - buf2[p]); } tested_func(8, tmp_residual, tmp_coeffs); ++call_cnt; sum += tmp_coeffs[0]; } } ASSERT(sum > 0); KVZ_GET_TIME(&clock_now) } sprintf(test_env.msg, "%.3fM x %s:%s", (double)call_cnt / 1000000.0, test_env.strategy->type, test_env.strategy->strategy_name); PASSm(test_env.msg); }
TEST test_inter_speed(const int width) { const int size = width * width; unsigned call_cnt = 0; KVZ_CLOCK_T clock_now; KVZ_GET_TIME(&clock_now); double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST; // Loop until time allocated for test has passed. for (unsigned i = 0; test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now); ++i) { int test = i % NUM_TESTS; uint64_t sum = 0; for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Treat 4 consecutive chunks as one chunk with double width and height, // and do a 8x8 grid search against the first chunk to simulate real usage. kvz_pixel * buf1 = &bufs[test][offset]; for (int chunk = 0; chunk < NUM_CHUNKS; chunk += 4) { kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; for (int y = 0; y < 8; ++y) { for (int x = 0; x < 8; ++x) { const int stride1 = 2 * 64; const int stride2 = 2 * 64; reg_sad_func *tested_func = test_env.tested_func; sum += tested_func(buf1, &buf2[y * stride2 + x], width, width, stride1, stride2); ++call_cnt; } } } } ASSERT(sum > 0); KVZ_GET_TIME(&clock_now) } sprintf(test_env.msg, "%.3fM x %s(%ix%i):%s", (double)call_cnt / 1000000.0, test_env.strategy->type, width, width, test_env.strategy->strategy_name); PASSm(test_env.msg); }
TEST test_reg_sad_overflow(void) { unsigned width = sad_test_env.width; unsigned height = sad_test_env.height; unsigned stride = 64; unsigned correct_result = simple_sad(g_64x64_zero->y, g_64x64_max->y, stride, width, height); unsigned(*tested_func)(const kvz_pixel *, const kvz_pixel *, int, int, unsigned, unsigned) = sad_test_env.tested_func; unsigned result = tested_func(g_64x64_zero->y, g_64x64_max->y, width, height, stride, stride); sprintf(sad_test_env.msg, "overflow %s(%ux%u):%s", sad_test_env.strategy->type, width, height, sad_test_env.strategy->strategy_name); if (result != correct_result) { FAILm(sad_test_env.msg); } PASSm(sad_test_env.msg); }
TEST test_intra_speed(const int width) { const int size = width * width; uint64_t call_cnt = 0; KVZ_CLOCK_T clock_now; KVZ_GET_TIME(&clock_now); double test_end = KVZ_CLOCK_T_AS_DOUBLE(clock_now) + TIME_PER_TEST; // Loop until time allocated for test has passed. for (unsigned i = 0; test_end > KVZ_CLOCK_T_AS_DOUBLE(clock_now); ++i) { int test = i % NUM_TESTS; uint64_t sum = 0; for (int offset = 0; offset < NUM_CHUNKS * 64 * 64; offset += NUM_CHUNKS * size) { // Compare the first chunk against the 35 other chunks to simulate real usage. kvz_pixel * buf1 = &bufs[test][offset]; for (int chunk = 1; chunk < NUM_CHUNKS; ++chunk) { kvz_pixel * buf2 = &bufs[test][chunk * size + offset]; cost_pixel_nxn_func *tested_func = test_env.tested_func; sum += tested_func(buf1, buf2); ++call_cnt; } } ASSERT(sum > 0); KVZ_GET_TIME(&clock_now) } sprintf(test_env.msg, "%.3fM x %s:%s", (double)call_cnt / 1000000.0, test_env.strategy->type, test_env.strategy->strategy_name); PASSm(test_env.msg); }