Ejemplo n.º 1
0
int
main(int argc, char *argv[])
{
  char *s, *d;
  orc_uint8 *src, *dest;
  OrcProfile prof;
  OrcProfile prof_libc;
  double ave, std;
  double ave_libc, std_libc;
  double null;
  int i,j;
  double cpufreq;
  int unalign;
  OrcProgram *p;
  int level1, level2, level3;
  int max;
  /* const uint8_t zero = 0; */

  orc_init ();

  /* cpufreq = 2333e6; */
  cpufreq = 1;

  if (argc > 1) {
    unalign = strtoul (argv[1], NULL, 0);
  } else {
    unalign = 0;
  }

  s = malloc(1024*1024*64+1024);
  d = malloc(1024*1024*64+1024);
  src = ORC_PTR_OFFSET(ALIGN(s,128),unalign);
  dest = ALIGN(d,128);

  orc_profile_init (&prof);
  for(j=0;j<10;j++){
    orc_profile_start(&prof);
    orc_profile_stop(&prof);
  }
  orc_profile_get_ave_std (&prof, &null, &std);
  
  {
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_name (p, "orc_memcpy");
    /* orc_program_set_name (p, "orc_memset"); */
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    /* orc_program_add_parameter (p, 1, "p1"); */

    orc_program_append (p, "copyb", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    result = orc_program_compile (p);

    if (ORC_COMPILE_RESULT_IS_FATAL (result)) {
      fprintf (stderr, "Failed to compile orc_memcpy\n");
      return -1;
    }
  }

#ifndef M_LN2
#define M_LN2 0.69314718055994530942
#endif
  orc_get_data_cache_sizes (&level1, &level2, &level3);
  if (level3 > 0) {
    max = (log(level3)/M_LN2 - 6.0) * 10 + 20;
  } else if (level2 > 0) {
    max = (log(level2)/M_LN2 - 6.0) * 10 + 20;
  } else {
    max = 140;
  }

  for(i=0;i<max;i++){
    double x = i*0.1 + 6.0;
    int size = pow(2.0, x);

    if (flush_cache) {
      touch (src, (1<<18));
    }
    if (hot_src) {
      touch (src, size);
    }
    if (hot_dest) {
      touch (dest, size);
    }

    orc_profile_init (&prof);
    for(j=0;j<10;j++){
      OrcExecutor _ex, *ex = &_ex;
      void (*func) (OrcExecutor *);

      orc_profile_start(&prof);
      /* orc_memcpy (dest, src, size); */
      ex->program = p;
      ex->n = size;
      ex->arrays[ORC_VAR_D1] = dest;
      ex->arrays[ORC_VAR_S1] = (void *)src;

      func = p->code_exec;
      func (ex);

      orc_profile_stop(&prof);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_init (&prof_libc);
    for(j=0;j<10;j++){
      orc_profile_start(&prof_libc);
      memcpy (dest, src, size);
      orc_profile_stop(&prof_libc);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_get_ave_std (&prof, &ave, &std);
    orc_profile_get_ave_std (&prof_libc, &ave_libc, &std_libc);

    ave -= null;
    ave_libc -= null;

    /* printf("%d: %10.4g %10.4g %10.4g %10.4g (libc %10.4g)\n", i, ave, std, */
    /*     ave/(1<<i), cpufreq/(ave/(1<<i)), */
    /*     cpufreq/(ave_libc/(1<<i))); */
    printf("%g %10.4g %10.4g\n", x,
        cpufreq/(ave/size), cpufreq/(ave_libc/size));
    /* printf("%g %10.4g %10.4g\n", x, */
    /*     32*(ave/(size)), 32*(ave_libc/(size))); */
    fflush (stdout);
  }

  orc_program_free (p);
  free (s);
  free (d);

  return 0;
}
Ejemplo n.º 2
0
double
orc_test_performance_full (OrcProgram *program, int flags,
    const char *target_name)
{
  OrcExecutor *ex;
  int n;
  int m;
  OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL };
  OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL };
  OrcArray *src[8] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL };
  int i, j;
  OrcCompileResult result;
  OrcProfile prof;
  double ave, std;
  OrcTarget *target;
  int misalignment;

  ORC_DEBUG ("got here");

  target = orc_target_get_by_name (target_name);

  if (!(flags & ORC_TEST_FLAGS_BACKUP)) {
    unsigned int flags;

    flags = orc_target_get_default_flags (target);

    result = orc_program_compile_full (program, target, flags);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL(result)) {
      //printf("compile failed\n");
      orc_program_reset (program);
      return 0;
    }
  }

  if (program->constant_n > 0) {
    n = program->constant_n;
  } else {
    //n = 64 + (orc_random(&rand_context)&0xf);
    n = 1000;
  }

  ex = orc_executor_new (program);
  orc_executor_set_n (ex, n);
  if (program->is_2d) {
    if (program->constant_m > 0) {
      m = program->constant_m;
    } else {
      m = 8 + (orc_random(&rand_context)&0xf);
    }
  } else {
    m = 1;
  }
  orc_executor_set_m (ex, m);
  ORC_DEBUG("size %d %d", ex->n, ex->params[ORC_VAR_A1]);

  misalignment = 0;
  for(i=0;i<ORC_N_VARIABLES;i++){
    if (program->vars[i].name == NULL) continue;

    if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) {
      src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size,
          misalignment);
      orc_array_set_random (src[i-ORC_VAR_S1], &rand_context);
      misalignment++;
    } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) {
      dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
          misalignment);
      orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE);
      dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
          misalignment);
      orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE);
      misalignment++;
    } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) {
      orc_executor_set_param (ex, i, 2);
    }
  }

  ORC_DEBUG ("running");
  orc_profile_init (&prof);
  for(i=0;i<10;i++){
    orc_executor_set_n (ex, n);
    orc_executor_set_m (ex, m);
    for(j=0;j<ORC_N_VARIABLES;j++){
      if (program->vars[j].vartype == ORC_VAR_TYPE_DEST) {
        orc_executor_set_array (ex, j, dest_exec[j-ORC_VAR_D1]->data);
        orc_executor_set_stride (ex, j, dest_exec[j-ORC_VAR_D1]->stride);
      }
      if (program->vars[j].vartype == ORC_VAR_TYPE_SRC) {
        orc_executor_set_array (ex, j, src[j-ORC_VAR_S1]->data);
        orc_executor_set_stride (ex, j, src[j-ORC_VAR_S1]->stride);
      }
    }
    if (flags & ORC_TEST_FLAGS_BACKUP) {
      orc_profile_start (&prof);
      orc_executor_run_backup (ex);
      orc_profile_stop (&prof);
    } else if (flags & ORC_TEST_FLAGS_EMULATE) {
      orc_profile_start (&prof);
      orc_executor_emulate (ex);
      orc_profile_stop (&prof);
    } else {
      orc_profile_start (&prof);
      orc_executor_run (ex);
      orc_profile_stop (&prof);
    }
  }
  ORC_DEBUG ("done running");

  orc_profile_get_ave_std (&prof, &ave, &std);

  for(i=0;i<4;i++){
    if (dest_exec[i]) orc_array_free (dest_exec[i]);
    if (dest_emul[i]) orc_array_free (dest_emul[i]);
  }
  for(i=0;i<8;i++){
    if (src[i]) orc_array_free (src[i]);
  }

  orc_executor_free (ex);
  orc_program_reset (program);

  return ave/(n*m);
}
Ejemplo n.º 3
0
int
main (int argc, char *argv[])
{
  SchroFrame *dest;
  SchroFrame *ref;
  SchroFrame *addframe;
  SchroParams params;
  SchroVideoFormat video_format;
  SchroMotionVector *motion_vectors;
  int i;
  int j;
  OrcProfile prof;
  double ave, std;

  schro_init();

  memset (&video_format, 0, sizeof(video_format));
  memset (&params, 0, sizeof(params));

  schro_video_format_set_std_video_format (&video_format,
      SCHRO_VIDEO_FORMAT_CUSTOM);
  video_format.width = 720;
  video_format.height = 480;
  video_format.chroma_format = SCHRO_CHROMA_420;
  schro_video_format_validate (&video_format);

  params.video_format = &video_format;
  params.xbsep_luma = 8;
  params.ybsep_luma = 8;
  params.xblen_luma = 12;
  params.yblen_luma = 12;

  schro_params_calculate_mc_sizes(&params);

  dest = schro_frame_new_and_alloc (NULL, SCHRO_FRAME_FORMAT_S16_420,
      video_format.width, video_format.height);
  schro_frame_clear(dest);

  ref = schro_frame_new_and_alloc_extended (NULL, SCHRO_FRAME_FORMAT_U8_420,
      video_format.width, video_format.height, 32);
  schro_frame_clear(ref);

  addframe = schro_frame_new_and_alloc (NULL, SCHRO_FRAME_FORMAT_S16_420,
      video_format.width, video_format.height);
  schro_frame_clear(addframe);

  schro_upsampled_frame_upsample (ref);

  motion_vectors = malloc(sizeof(SchroMotionVector) *
      params.x_num_blocks * params.y_num_blocks);
  memset (motion_vectors, 0, sizeof(SchroMotionVector) *
      params.x_num_blocks * params.y_num_blocks);
  
  printf("sizeof(SchroMotionVector) = %lu\n",(unsigned long) sizeof(SchroMotionVector));
  printf("num blocks %d x %d\n", params.x_num_blocks, params.y_num_blocks);
  for(i=0;i<params.x_num_blocks*params.y_num_blocks;i++){
    motion_vectors[i].u.vec.dx[0] = 0;
    motion_vectors[i].u.vec.dy[0] = 0;
    motion_vectors[i].pred_mode = 1;
    motion_vectors[i].split = 2;
  }

  for(i=0;i<10;i++){
    orc_profile_init (&prof);
    for(j=0;j<10;j++){
      SchroMotion *motion;
      void *mv_save;

      motion = schro_motion_new (&params, ref, NULL);
      mv_save = motion->motion_vectors;
      motion->motion_vectors = motion_vectors;
      orc_profile_start(&prof);
      schro_motion_render (motion, dest, addframe, FALSE, NULL);
      orc_profile_stop(&prof);
      motion->motion_vectors = mv_save;
      schro_motion_free (motion);
    }
    orc_profile_get_ave_std (&prof, &ave, &std);
    printf("cycles %g %g\n", ave, std);
  }

  schro_frame_unref (ref);
  schro_frame_unref (dest);
  free (motion_vectors);

  return 0;
}