int main(int argc, char *argv[]) { char *s, *d; orc_uint8 *src, *dest; OrcProfile prof; OrcProfile prof_libc; double ave, std; double ave_libc, std_libc; double null; int i,j; double cpufreq; int unalign; OrcProgram *p; int level1, level2, level3; int max; /* const uint8_t zero = 0; */ orc_init (); /* cpufreq = 2333e6; */ cpufreq = 1; if (argc > 1) { unalign = strtoul (argv[1], NULL, 0); } else { unalign = 0; } s = malloc(1024*1024*64+1024); d = malloc(1024*1024*64+1024); src = ORC_PTR_OFFSET(ALIGN(s,128),unalign); dest = ALIGN(d,128); orc_profile_init (&prof); for(j=0;j<10;j++){ orc_profile_start(&prof); orc_profile_stop(&prof); } orc_profile_get_ave_std (&prof, &null, &std); { OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_memcpy"); /* orc_program_set_name (p, "orc_memset"); */ orc_program_add_destination (p, 1, "d1"); orc_program_add_source (p, 1, "s1"); /* orc_program_add_parameter (p, 1, "p1"); */ orc_program_append (p, "copyb", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1); result = orc_program_compile (p); if (ORC_COMPILE_RESULT_IS_FATAL (result)) { fprintf (stderr, "Failed to compile orc_memcpy\n"); return -1; } } #ifndef M_LN2 #define M_LN2 0.69314718055994530942 #endif orc_get_data_cache_sizes (&level1, &level2, &level3); if (level3 > 0) { max = (log(level3)/M_LN2 - 6.0) * 10 + 20; } else if (level2 > 0) { max = (log(level2)/M_LN2 - 6.0) * 10 + 20; } else { max = 140; } for(i=0;i<max;i++){ double x = i*0.1 + 6.0; int size = pow(2.0, x); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } orc_profile_init (&prof); for(j=0;j<10;j++){ OrcExecutor _ex, *ex = &_ex; void (*func) (OrcExecutor *); orc_profile_start(&prof); /* orc_memcpy (dest, src, size); */ ex->program = p; ex->n = size; ex->arrays[ORC_VAR_D1] = dest; ex->arrays[ORC_VAR_S1] = (void *)src; func = p->code_exec; func (ex); orc_profile_stop(&prof); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } } orc_profile_init (&prof_libc); for(j=0;j<10;j++){ orc_profile_start(&prof_libc); memcpy (dest, src, size); orc_profile_stop(&prof_libc); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } } orc_profile_get_ave_std (&prof, &ave, &std); orc_profile_get_ave_std (&prof_libc, &ave_libc, &std_libc); ave -= null; ave_libc -= null; /* printf("%d: %10.4g %10.4g %10.4g %10.4g (libc %10.4g)\n", i, ave, std, */ /* ave/(1<<i), cpufreq/(ave/(1<<i)), */ /* cpufreq/(ave_libc/(1<<i))); */ printf("%g %10.4g %10.4g\n", x, cpufreq/(ave/size), cpufreq/(ave_libc/size)); /* printf("%g %10.4g %10.4g\n", x, */ /* 32*(ave/(size)), 32*(ave_libc/(size))); */ fflush (stdout); } orc_program_free (p); free (s); free (d); return 0; }
double orc_test_performance_full (OrcProgram *program, int flags, const char *target_name) { OrcExecutor *ex; int n; int m; OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL }; OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL }; OrcArray *src[8] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; int i, j; OrcCompileResult result; OrcProfile prof; double ave, std; OrcTarget *target; int misalignment; ORC_DEBUG ("got here"); target = orc_target_get_by_name (target_name); if (!(flags & ORC_TEST_FLAGS_BACKUP)) { unsigned int flags; flags = orc_target_get_default_flags (target); result = orc_program_compile_full (program, target, flags); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL(result)) { //printf("compile failed\n"); orc_program_reset (program); return 0; } } if (program->constant_n > 0) { n = program->constant_n; } else { //n = 64 + (orc_random(&rand_context)&0xf); n = 1000; } ex = orc_executor_new (program); orc_executor_set_n (ex, n); if (program->is_2d) { if (program->constant_m > 0) { m = program->constant_m; } else { m = 8 + (orc_random(&rand_context)&0xf); } } else { m = 1; } orc_executor_set_m (ex, m); ORC_DEBUG("size %d %d", ex->n, ex->params[ORC_VAR_A1]); misalignment = 0; for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].name == NULL) continue; if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_random (src[i-ORC_VAR_S1], &rand_context); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE); dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) { orc_executor_set_param (ex, i, 2); } } ORC_DEBUG ("running"); orc_profile_init (&prof); for(i=0;i<10;i++){ orc_executor_set_n (ex, n); orc_executor_set_m (ex, m); for(j=0;j<ORC_N_VARIABLES;j++){ if (program->vars[j].vartype == ORC_VAR_TYPE_DEST) { orc_executor_set_array (ex, j, dest_exec[j-ORC_VAR_D1]->data); orc_executor_set_stride (ex, j, dest_exec[j-ORC_VAR_D1]->stride); } if (program->vars[j].vartype == ORC_VAR_TYPE_SRC) { orc_executor_set_array (ex, j, src[j-ORC_VAR_S1]->data); orc_executor_set_stride (ex, j, src[j-ORC_VAR_S1]->stride); } } if (flags & ORC_TEST_FLAGS_BACKUP) { orc_profile_start (&prof); orc_executor_run_backup (ex); orc_profile_stop (&prof); } else if (flags & ORC_TEST_FLAGS_EMULATE) { orc_profile_start (&prof); orc_executor_emulate (ex); orc_profile_stop (&prof); } else { orc_profile_start (&prof); orc_executor_run (ex); orc_profile_stop (&prof); } } ORC_DEBUG ("done running"); orc_profile_get_ave_std (&prof, &ave, &std); for(i=0;i<4;i++){ if (dest_exec[i]) orc_array_free (dest_exec[i]); if (dest_emul[i]) orc_array_free (dest_emul[i]); } for(i=0;i<8;i++){ if (src[i]) orc_array_free (src[i]); } orc_executor_free (ex); orc_program_reset (program); return ave/(n*m); }
int main (int argc, char *argv[]) { SchroFrame *dest; SchroFrame *ref; SchroFrame *addframe; SchroParams params; SchroVideoFormat video_format; SchroMotionVector *motion_vectors; int i; int j; OrcProfile prof; double ave, std; schro_init(); memset (&video_format, 0, sizeof(video_format)); memset (¶ms, 0, sizeof(params)); schro_video_format_set_std_video_format (&video_format, SCHRO_VIDEO_FORMAT_CUSTOM); video_format.width = 720; video_format.height = 480; video_format.chroma_format = SCHRO_CHROMA_420; schro_video_format_validate (&video_format); params.video_format = &video_format; params.xbsep_luma = 8; params.ybsep_luma = 8; params.xblen_luma = 12; params.yblen_luma = 12; schro_params_calculate_mc_sizes(¶ms); dest = schro_frame_new_and_alloc (NULL, SCHRO_FRAME_FORMAT_S16_420, video_format.width, video_format.height); schro_frame_clear(dest); ref = schro_frame_new_and_alloc_extended (NULL, SCHRO_FRAME_FORMAT_U8_420, video_format.width, video_format.height, 32); schro_frame_clear(ref); addframe = schro_frame_new_and_alloc (NULL, SCHRO_FRAME_FORMAT_S16_420, video_format.width, video_format.height); schro_frame_clear(addframe); schro_upsampled_frame_upsample (ref); motion_vectors = malloc(sizeof(SchroMotionVector) * params.x_num_blocks * params.y_num_blocks); memset (motion_vectors, 0, sizeof(SchroMotionVector) * params.x_num_blocks * params.y_num_blocks); printf("sizeof(SchroMotionVector) = %lu\n",(unsigned long) sizeof(SchroMotionVector)); printf("num blocks %d x %d\n", params.x_num_blocks, params.y_num_blocks); for(i=0;i<params.x_num_blocks*params.y_num_blocks;i++){ motion_vectors[i].u.vec.dx[0] = 0; motion_vectors[i].u.vec.dy[0] = 0; motion_vectors[i].pred_mode = 1; motion_vectors[i].split = 2; } for(i=0;i<10;i++){ orc_profile_init (&prof); for(j=0;j<10;j++){ SchroMotion *motion; void *mv_save; motion = schro_motion_new (¶ms, ref, NULL); mv_save = motion->motion_vectors; motion->motion_vectors = motion_vectors; orc_profile_start(&prof); schro_motion_render (motion, dest, addframe, FALSE, NULL); orc_profile_stop(&prof); motion->motion_vectors = mv_save; schro_motion_free (motion); } orc_profile_get_ave_std (&prof, &ave, &std); printf("cycles %g %g\n", ave, std); } schro_frame_unref (ref); schro_frame_unref (dest); free (motion_vectors); return 0; }