static void check_pred8x8l(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { if (chroma_format == 1 && codec_ids[codec] == AV_CODEC_ID_H264) { int pred_mode; for (pred_mode = 0; pred_mode < 12; pred_mode++) { if (check_pred_func(h->pred8x8l[pred_mode], "8x8l", pred4x4_modes[codec][pred_mode])) { int neighbors; for (neighbors = 0; neighbors <= 0xc000; neighbors += 0x4000) { int has_topleft = neighbors & 0x8000; int has_topright = neighbors & 0x4000; if ((pred_mode == DIAG_DOWN_RIGHT_PRED || pred_mode == VERT_RIGHT_PRED) && !has_topleft) continue; /* Those aren't allowed according to the spec */ randomize_buffers(); call_ref(src0, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); call_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); if (memcmp(buf0, buf1, BUF_SIZE)) fail(); bench_new(src1, has_topleft, has_topright, (ptrdiff_t)24*SIZEOF_PIXEL); } } } } }
static void check_add_res(HEVCDSPContext h, int bit_depth) { int i; LOCAL_ALIGNED(32, int16_t, res0, [32 * 32]); LOCAL_ALIGNED(32, int16_t, res1, [32 * 32]); LOCAL_ALIGNED(32, uint8_t, dst0, [32 * 32 * 2]); LOCAL_ALIGNED(32, uint8_t, dst1, [32 * 32 * 2]); for (i = 2; i <= 5; i++) { int block_size = 1 << i; int size = block_size * block_size; ptrdiff_t stride = block_size << (bit_depth > 8); declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *res, ptrdiff_t stride); randomize_buffers(res0, size); randomize_buffers2(dst0, size); memcpy(res1, res0, sizeof(*res0) * size); memcpy(dst1, dst0, size); if (check_func(h.add_residual[i - 2], "add_res_%dx%d_%d", block_size, block_size, bit_depth)) { call_ref(dst0, res0, stride); call_new(dst1, res1, stride); if (memcmp(dst0, dst1, size)) fail(); bench_new(dst1, res1, stride); } } }
static void check_add_bytes(LLVidDSPContext c, int width) { uint8_t *src0 = av_mallocz(width); uint8_t *src1 = av_mallocz(width); uint8_t *dst0 = av_mallocz(width); uint8_t *dst1 = av_mallocz(width); declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, uint8_t *src, ptrdiff_t w); if (!src0 || !src1 || !dst0 || !dst1) fail(); randomize_buffers(src0, width); memcpy(src1, src0, width); if (check_func(c.add_bytes, "add_bytes")) { call_ref(dst0, src0, width); call_new(dst1, src1, width); if (memcmp(dst0, dst1, width)) fail(); bench_new(dst1, src1, width); } av_free(src0); av_free(src1); av_free(dst0); av_free(dst1); }
static void check_pred8x8(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { int pred_mode; for (pred_mode = 0; pred_mode < 11; pred_mode++) { if (check_pred_func(h->pred8x8[pred_mode], (chroma_format == 2) ? "8x16" : "8x8", pred8x8_modes[codec][pred_mode])) { randomize_buffers(); call_ref(src0, (ptrdiff_t)24*SIZEOF_PIXEL); call_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); if (memcmp(buf0, buf1, BUF_SIZE)) fail(); bench_new(src1, (ptrdiff_t)24*SIZEOF_PIXEL); } } }
static void check_pred16x16(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { if (chroma_format == 1) { int pred_mode; for (pred_mode = 0; pred_mode < 9; pred_mode++) { if (check_pred_func(h->pred16x16[pred_mode], "16x16", pred16x16_modes[codec][pred_mode])) { randomize_buffers(); call_ref(src0, (ptrdiff_t)48); call_new(src1, (ptrdiff_t)48); if (memcmp(buf0, buf1, BUF_SIZE)) fail(); bench_new(src1, (ptrdiff_t)48); } } } }
static void check_pred4x4(H264PredContext *h, uint8_t *buf0, uint8_t *buf1, int codec, int chroma_format, int bit_depth) { if (chroma_format == 1) { uint8_t *topright = buf0 + 2*16; int pred_mode; for (pred_mode = 0; pred_mode < 15; pred_mode++) { if (check_pred_func(h->pred4x4[pred_mode], "4x4", pred4x4_modes[codec][pred_mode])) { randomize_buffers(); call_ref(src0, topright, (ptrdiff_t)12*SIZEOF_PIXEL); call_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); if (memcmp(buf0, buf1, BUF_SIZE)) fail(); bench_new(src1, topright, (ptrdiff_t)12*SIZEOF_PIXEL); } } } }
static void check_qpel(HEVCDSPContext *h, int16_t *dst0, int16_t *dst1, uint8_t *src, int16_t *mcbuffer, int bit_depth) { int i, j, k, l, mx, my; declare_func(void, int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int height, int mx, int my, int16_t *mcbuffer); randomize_buffers(src, BUF_SIZE, bit_depth); memset(dst0, 0, BUF_SIZE * sizeof(*dst0)); memset(dst1, 0, BUF_SIZE * sizeof(*dst1)); for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { for (k = 0; k < FF_ARRAY_ELEMS(h->put_hevc_qpel[i][j]); k++) { int width = pred_widths[k]; int dststride = FFALIGN(width, 16) * sizeof(*dst0); int srcstride = FFALIGN(width + 7, 8) * PIXEL_SIZE(bit_depth); if (!check_func(h->put_hevc_qpel[i][j][k], "qpel_%s_%d_%d", interp_names[i][j], width, bit_depth)) continue; for (l = 0; l < FF_ARRAY_ELEMS(pred_heights[0]); l++) { int height = pred_heights[width][l]; if (!height) continue; for (my = i; my < (i ? 2 : 1); my++) for (mx = j; mx < (j ? 2 : 1); mx++) { call_ref(dst0, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer); call_new(dst1, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer); if (memcmp(dst0, dst1, dststride * height * sizeof(*dst0))) fail(); bench_new(dst1, dststride, src + 3 * srcstride + 3 * PIXEL_SIZE(bit_depth), srcstride, height, mx, my, mcbuffer); } } } } } }
static void check_idct_dc(HEVCDSPContext h, int bit_depth) { int i; LOCAL_ALIGNED(32, int16_t, coeffs0, [32 * 32]); LOCAL_ALIGNED(32, int16_t, coeffs1, [32 * 32]); for (i = 2; i <= 5; i++) { int block_size = 1 << i; int size = block_size * block_size; declare_func_emms(AV_CPU_FLAG_MMXEXT, void, int16_t *coeffs); randomize_buffers(coeffs0, size); memcpy(coeffs1, coeffs0, sizeof(*coeffs0) * size); if (check_func(h.idct_dc[i - 2], "idct_%dx%d_dc_%d", block_size, block_size, bit_depth)) { call_ref(coeffs0); call_new(coeffs1); if (memcmp(coeffs0, coeffs1, sizeof(*coeffs0) * size)) fail(); bench_new(coeffs1); } } }
static void test_fcmul_add(const float *src0, const float *src1, const float *src2) { LOCAL_ALIGNED_32(float, cdst, [LEN*2+8]); LOCAL_ALIGNED_32(float, odst, [LEN*2+8]); int i; declare_func(void, float *sum, const float *t, const float *c, ptrdiff_t len); memcpy(cdst, src0, (LEN*2+8) * sizeof(float)); memcpy(odst, src0, (LEN*2+8) * sizeof(float)); call_ref(cdst, src1, src2, LEN); call_new(odst, src1, src2, LEN); for (i = 0; i <= LEN*2; i++) { if (!float_near_abs_eps(cdst[i], odst[i], FLT_EPSILON)) { fprintf(stderr, "%d: %- .12f - %- .12f = % .12g\n", i, cdst[i], odst[i], cdst[i] - odst[i]); fail(); break; } } memcpy(odst, src0, (LEN*2+8) * sizeof(float)); bench_new(odst, src1, src2, LEN); }
int main(int argc,char **argv) { int number = 0; int queue = 1; int qnum = 0; int iq = 0; int type = 0; int i = 0; pid_t pid; struct timeval tv1,tv2; if(argc < 4) { printf("Usage:./benchmark thread_number queue_number test_type(1:write,other:read) \n"); exit(255); } number = qnum = atoi(argv[1]); iq = atoi(argv[2]); type = atoi(argv[3]); gettimeofday(&tv1,NULL); for(i = 0; i < number; i++) { pid = fork(); if (pid < 0) { perror("fork error:"); continue; } if (pid == 0) { usleep(1); break; } queue++; } if (pid == 0) { bench_new(queue, iq, type); return 0; } else { while(1) { if (--number == 0) break; } waitpid(0, NULL, 0); } gettimeofday(&tv2,NULL); printf("time is %f,conn is %f persecond\n",((tv2.tv_sec-tv1.tv_sec)+(tv2.tv_usec-tv1.tv_usec)/1000000.0),qnum/((tv2.tv_sec-tv1.tv_sec)+(tv2.tv_usec-tv1.tv_usec)/1000000.0)*iq); return 0; }