Ejemplo n.º 1
0
void
orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n)
{
  OrcExecutor _ex, *ex = &_ex;
  static int p_inited = 0;
  static OrcProgram *p = 0;
  void (*func) (OrcExecutor *);

  if (!p_inited) {
    orc_once_mutex_lock ();
    if (!p_inited) {
      OrcCompileResult result;

      p = orc_program_new ();
      orc_program_set_name (p, "orc_memcpy_u32");
      orc_program_set_backup_function (p, _backup_orc_memcpy_u32);
      orc_program_add_destination (p, 4, "d1");
      orc_program_add_source (p, 4, "s1");

      orc_program_append (p, "copyl", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

      result = orc_program_compile (p);
    }
    p_inited = TRUE;
    orc_once_mutex_unlock ();
  }
  ex->program = p;

  ex->n = n;
  ex->arrays[ORC_VAR_D1] = d1;
  ex->arrays[ORC_VAR_S1] = (void *) s1;

  func = p->code_exec;
  func (ex);
}
Ejemplo n.º 2
0
void
orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride,
    int p1, int n, int m)
{
  OrcExecutor _ex, *ex = &_ex;
  static int p_inited = 0;
  static OrcProgram *p = 0;
  void (*func) (OrcExecutor *);

  if (!p_inited) {
    orc_once_mutex_lock ();
    if (!p_inited) {
      OrcCompileResult result;

      p = orc_program_new ();
      orc_program_set_2d (p);
      orc_program_set_name (p, "orc_blend_u8");
      orc_program_set_backup_function (p, _backup_orc_blend_u8);
      orc_program_add_destination (p, 1, "d1");
      orc_program_add_source (p, 1, "s1");
      orc_program_add_constant (p, 1, 8, "c1");
      orc_program_add_parameter (p, 2, "p1");
      orc_program_add_temporary (p, 2, "t1");
      orc_program_add_temporary (p, 2, "t2");

      orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1);
      orc_program_append (p, "convubw", ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1);
      orc_program_append (p, "subw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1);
      orc_program_append (p, "mullw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1);
      orc_program_append (p, "shlw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1);
      orc_program_append (p, "addw", ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2);
      orc_program_append (p, "shruw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1);
      orc_program_append (p, "convsuswb", ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1);

      result = orc_program_compile (p);
    }
    p_inited = TRUE;
    orc_once_mutex_unlock ();
  }
  ex->program = p;

  ex->n = n;
  ORC_EXECUTOR_M (ex) = m;
  ex->arrays[ORC_VAR_D1] = d1;
  ex->params[ORC_VAR_D1] = d1_stride;
  ex->arrays[ORC_VAR_S1] = (void *) s1;
  ex->params[ORC_VAR_S1] = s1_stride;
  ex->params[ORC_VAR_P1] = p1;

  func = p->code_exec;
  func (ex);
}
Ejemplo n.º 3
0
int
main(int argc, char *argv[])
{
  char *s, *d;
  orc_uint8 *src, *dest;
  OrcProfile prof;
  OrcProfile prof_libc;
  double ave, std;
  double ave_libc, std_libc;
  double null;
  int i,j;
  double cpufreq;
  int unalign;
  OrcProgram *p;
  int level1, level2, level3;
  int max;
  /* const uint8_t zero = 0; */

  orc_init ();

  /* cpufreq = 2333e6; */
  cpufreq = 1;

  if (argc > 1) {
    unalign = strtoul (argv[1], NULL, 0);
  } else {
    unalign = 0;
  }

  s = malloc(1024*1024*64+1024);
  d = malloc(1024*1024*64+1024);
  src = ORC_PTR_OFFSET(ALIGN(s,128),unalign);
  dest = ALIGN(d,128);

  orc_profile_init (&prof);
  for(j=0;j<10;j++){
    orc_profile_start(&prof);
    orc_profile_stop(&prof);
  }
  orc_profile_get_ave_std (&prof, &null, &std);
  
  {
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_name (p, "orc_memcpy");
    /* orc_program_set_name (p, "orc_memset"); */
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    /* orc_program_add_parameter (p, 1, "p1"); */

    orc_program_append (p, "copyb", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    result = orc_program_compile (p);

    if (ORC_COMPILE_RESULT_IS_FATAL (result)) {
      fprintf (stderr, "Failed to compile orc_memcpy\n");
      return -1;
    }
  }

#ifndef M_LN2
#define M_LN2 0.69314718055994530942
#endif
  orc_get_data_cache_sizes (&level1, &level2, &level3);
  if (level3 > 0) {
    max = (log(level3)/M_LN2 - 6.0) * 10 + 20;
  } else if (level2 > 0) {
    max = (log(level2)/M_LN2 - 6.0) * 10 + 20;
  } else {
    max = 140;
  }

  for(i=0;i<max;i++){
    double x = i*0.1 + 6.0;
    int size = pow(2.0, x);

    if (flush_cache) {
      touch (src, (1<<18));
    }
    if (hot_src) {
      touch (src, size);
    }
    if (hot_dest) {
      touch (dest, size);
    }

    orc_profile_init (&prof);
    for(j=0;j<10;j++){
      OrcExecutor _ex, *ex = &_ex;
      void (*func) (OrcExecutor *);

      orc_profile_start(&prof);
      /* orc_memcpy (dest, src, size); */
      ex->program = p;
      ex->n = size;
      ex->arrays[ORC_VAR_D1] = dest;
      ex->arrays[ORC_VAR_S1] = (void *)src;

      func = p->code_exec;
      func (ex);

      orc_profile_stop(&prof);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_init (&prof_libc);
    for(j=0;j<10;j++){
      orc_profile_start(&prof_libc);
      memcpy (dest, src, size);
      orc_profile_stop(&prof_libc);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_get_ave_std (&prof, &ave, &std);
    orc_profile_get_ave_std (&prof_libc, &ave_libc, &std_libc);

    ave -= null;
    ave_libc -= null;

    /* printf("%d: %10.4g %10.4g %10.4g %10.4g (libc %10.4g)\n", i, ave, std, */
    /*     ave/(1<<i), cpufreq/(ave/(1<<i)), */
    /*     cpufreq/(ave_libc/(1<<i))); */
    printf("%g %10.4g %10.4g\n", x,
        cpufreq/(ave/size), cpufreq/(ave_libc/size));
    /* printf("%g %10.4g %10.4g\n", x, */
    /*     32*(ave/(size)), 32*(ave_libc/(size))); */
    fflush (stdout);
  }

  orc_program_free (p);
  free (s);
  free (d);

  return 0;
}
Ejemplo n.º 4
0
static void
schro_motion_init_functions (SchroMotion * motion)
{
  if (motion_funcs[motion->xblen >> 1].block_accumulate == NULL) {
    OrcProgram *p;
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_constant_n (p, motion->xblen);
    orc_program_set_2d (p);
    orc_program_set_name (p, "block_acc_Xxn");

    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_temporary (p, 2, "t1");

    orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1);
    orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1);

    result = orc_program_compile (p);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) {
      SCHRO_ERROR ("compile failed");
    }

    motion_funcs[motion->xblen / 2].block_accumulate = p;
  }

  if (motion_funcs[motion->xblen >> 1].block_accumulate_scaled == NULL) {
    OrcProgram *p;
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_constant_n (p, motion->xblen);
    orc_program_set_2d (p);
    orc_program_set_name (p, "block_acc_scaled_Xxn");

    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_constant (p, 2, 32, "c1");
    orc_program_add_constant (p, 2, 6, "c2");
    orc_program_add_temporary (p, 2, "t1");

    orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1);
    orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1);
    orc_program_append (p, "shrsw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1);
    orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1);

    result = orc_program_compile (p);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) {
      SCHRO_ERROR ("compile failed");
    }

    motion_funcs[motion->xblen / 2].block_accumulate_scaled = p;
  }

  if (motion_funcs[motion->xblen >> 1].block_accumulate_dc == NULL) {
    OrcProgram *p;
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_constant_n (p, motion->xblen);
    orc_program_set_2d (p);
    orc_program_set_name (p, "block_acc_dc_Xxn");

    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_temporary (p, 2, "t1");

    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_P1);
    orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1);

    result = orc_program_compile (p);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) {
      SCHRO_ERROR ("compile failed");
    }

    motion_funcs[motion->xblen / 2].block_accumulate_dc = p;
  }

  if (motion_funcs[motion->xblen >> 1].block_accumulate_avg == NULL) {
    OrcProgram *p;
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_constant_n (p, motion->xblen);
    orc_program_set_2d (p);
    orc_program_set_name (p, "block_acc_avg_Xxn");

    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 1, "t2");

    orc_program_append (p, "avgub", ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S3);
    orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_T2, 0);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1);
    orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1);

    result = orc_program_compile (p);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) {
      SCHRO_ERROR ("compile failed");
    }

    motion_funcs[motion->xblen / 2].block_accumulate_avg = p;
  }

  if (motion_funcs[motion->xblen >> 1].block_accumulate_biref == NULL) {
    OrcProgram *p;
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_constant_n (p, motion->xblen);
    orc_program_set_2d (p);
    orc_program_set_name (p, "block_acc_biref_Xxn");

    orc_program_add_destination (p, 2, "d1");
    orc_program_add_source (p, 2, "s1");
    orc_program_add_source (p, 1, "s2");
    orc_program_add_source (p, 1, "s3");
    orc_program_add_parameter (p, 2, "p1");
    orc_program_add_parameter (p, 2, "p2");
    orc_program_add_constant (p, 2, 32, "c1");
    orc_program_add_constant (p, 2, 6, "c2");
    orc_program_add_temporary (p, 2, "t1");
    orc_program_add_temporary (p, 2, "t2");

    orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, 0);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1);
    orc_program_append (p, "convubw", ORC_VAR_T2, ORC_VAR_S3, 0);
    orc_program_append (p, "mullw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2);
    orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2);
    orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1);
    orc_program_append (p, "shrsw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2);
    orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1);
    orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1);

    result = orc_program_compile (p);
    if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) {
      SCHRO_ERROR ("compile failed");
    }

    motion_funcs[motion->xblen / 2].block_accumulate_biref = p;
  }
}