static void test_simple (int max, int (*adder) (OrcProgram *, int, const char *)) { OrcProgram *p; int v; OrcCompileResult result; p = orc_program_new (); /* dummy program so compile doesn't barf */ orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_append_str (p, "addw", "d1", "d1", "s1"); /* we've alreay added one of those */ if (adder == orc_program_add_destination || adder == orc_program_add_source) max--; /* Check we can add up to the claimed max */ for (v = 0; v < max; v++) (*adder) (p, 2, names + v); result = orc_program_compile (p); if (ORC_COMPILE_RESULT_IS_FATAL (result)) error = TRUE; orc_program_reset (p); /* Check we can not add one more */ (*adder) (p, 2, names + v); result = orc_program_compile (p); if (ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) error = TRUE; orc_program_free (p); }
void orc_memcpy_u32 (guint32 * d1, const guint32 * s1, int n) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; static OrcProgram *p = 0; void (*func) (OrcExecutor *); if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_memcpy_u32"); orc_program_set_backup_function (p, _backup_orc_memcpy_u32); orc_program_add_destination (p, 4, "d1"); orc_program_add_source (p, 4, "s1"); orc_program_append (p, "copyl", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1); result = orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); } ex->program = p; ex->n = n; ex->arrays[ORC_VAR_D1] = d1; ex->arrays[ORC_VAR_S1] = (void *) s1; func = p->code_exec; func (ex); }
gboolean vips_vector_compile( VipsVector *vector ) { #ifdef HAVE_ORC OrcCompileResult result; /* Some orcs seem to be unstable with many compilers active at once. */ g_mutex_lock( vips__global_lock ); result = orc_program_compile( vector->program ); g_mutex_unlock( vips__global_lock ); #ifdef DEBUG_TRACE printf( "orc_program_compile( %s );\n", vector->unique_name ); #endif /*DEBUG_TRACE*/ if( !ORC_COMPILE_RESULT_IS_SUCCESSFUL( result ) ) { #ifdef DEBUG printf( "*** error compiling %s\n", vector->name ); #endif /*DEBUG*/ return( FALSE ); } vector->compiled = TRUE; #endif /*HAVE_ORC*/ return( TRUE ); }
int main (int argc, char *argv[]) { OrcProgram **programs; OrcCompileResult cres; int n, i; orc_init (); orc_test_init (); /* 1 - unix */ n = orc_parse (txt_unix, &programs); for (i = 0; i < n; i++) { if (verbose) printf ("%s\n", programs[i]->name); orc_test_compare_output_full (programs[i], 0); cres = orc_program_compile (programs[i]); if (ORC_COMPILE_RESULT_IS_FATAL (cres)) { fprintf (stderr, "compile error: %d\n", cres); error = TRUE; } orc_program_free (programs[i]); } if (error || n == 0) return 1; /* 2 - windows */ n = orc_parse (txt_win32, &programs); for (i = 0; i < n; i++) { if (verbose) printf ("%s\n", programs[i]->name); orc_test_compare_output_full (programs[i], 0); cres = orc_program_compile (programs[i]); if (ORC_COMPILE_RESULT_IS_FATAL (cres)) { fprintf (stderr, "compile error: %d\n", cres); error = TRUE; } orc_program_free (programs[i]); } if (error || n == 0) return 1; return 0; }
void orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; static OrcProgram *p = 0; void (*func) (OrcExecutor *); if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); orc_program_set_name (p, "orc_blend_u8"); orc_program_set_backup_function (p, _backup_orc_blend_u8); orc_program_add_destination (p, 1, "d1"); orc_program_add_source (p, 1, "s1"); orc_program_add_constant (p, 1, 8, "c1"); orc_program_add_parameter (p, 2, "p1"); orc_program_add_temporary (p, 2, "t1"); orc_program_add_temporary (p, 2, "t2"); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); orc_program_append (p, "convubw", ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1); orc_program_append (p, "subw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1); orc_program_append (p, "mullw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1); orc_program_append (p, "shlw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1); orc_program_append (p, "addw", ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2); orc_program_append (p, "shruw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1); orc_program_append (p, "convsuswb", ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1); result = orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); } ex->program = p; ex->n = n; ORC_EXECUTOR_M (ex) = m; ex->arrays[ORC_VAR_D1] = d1; ex->params[ORC_VAR_D1] = d1_stride; ex->arrays[ORC_VAR_S1] = (void *) s1; ex->params[ORC_VAR_S1] = s1_stride; ex->params[ORC_VAR_P1] = p1; func = p->code_exec; func (ex); }
static int sum_square_diff_u8 (uint8_t * s1, uint8_t * s2, int n) { #ifndef HAVE_ORC int sum = 0; int i; int x; for (i = 0; i < n; i++) { x = s1[i] - s2[i]; sum += x * x; } return sum; #else static OrcProgram *p = NULL; OrcExecutor *ex; int val; if (p == NULL) { OrcCompileResult ret; p = orc_program_new_ass (4, 1, 1); orc_program_add_temporary (p, 2, "t1"); orc_program_add_temporary (p, 2, "t2"); orc_program_add_temporary (p, 4, "t3"); orc_program_append_ds_str (p, "convubw", "t1", "s1"); orc_program_append_ds_str (p, "convubw", "t2", "s2"); orc_program_append_str (p, "subw", "t1", "t1", "t2"); orc_program_append_str (p, "mullw", "t1", "t1", "t1"); orc_program_append_ds_str (p, "convuwl", "t3", "t1"); orc_program_append_ds_str (p, "accl", "a1", "t3"); ret = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (ret)) { GST_ERROR ("Orc compiler failure"); return 0; } } ex = orc_executor_new (p); orc_executor_set_n (ex, n); orc_executor_set_array_str (ex, "s1", s1); orc_executor_set_array_str (ex, "s2", s2); orc_executor_run (ex); val = orc_executor_get_accumulator (ex, 0); orc_executor_free (ex); return val; #endif }
gboolean vips_vector_compile( VipsVector *vector ) { #ifdef HAVE_ORC OrcCompileResult result; result = orc_program_compile( vector->program ); if( !ORC_COMPILE_RESULT_IS_SUCCESSFUL( result ) ) { #ifdef DEBUG printf( "*** error compiling %s\n", vector->name ); #endif /*DEBUG*/ return( FALSE ); } vector->compiled = TRUE; #endif /*HAVE_ORC*/ return( TRUE ); }
int main(int argc, char *argv[]) { char *s, *d; orc_uint8 *src, *dest; OrcProfile prof; OrcProfile prof_libc; double ave, std; double ave_libc, std_libc; double null; int i,j; double cpufreq; int unalign; OrcProgram *p; int level1, level2, level3; int max; /* const uint8_t zero = 0; */ orc_init (); /* cpufreq = 2333e6; */ cpufreq = 1; if (argc > 1) { unalign = strtoul (argv[1], NULL, 0); } else { unalign = 0; } s = malloc(1024*1024*64+1024); d = malloc(1024*1024*64+1024); src = ORC_PTR_OFFSET(ALIGN(s,128),unalign); dest = ALIGN(d,128); orc_profile_init (&prof); for(j=0;j<10;j++){ orc_profile_start(&prof); orc_profile_stop(&prof); } orc_profile_get_ave_std (&prof, &null, &std); { OrcCompileResult result; p = orc_program_new (); orc_program_set_name (p, "orc_memcpy"); /* orc_program_set_name (p, "orc_memset"); */ orc_program_add_destination (p, 1, "d1"); orc_program_add_source (p, 1, "s1"); /* orc_program_add_parameter (p, 1, "p1"); */ orc_program_append (p, "copyb", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1); result = orc_program_compile (p); if (ORC_COMPILE_RESULT_IS_FATAL (result)) { fprintf (stderr, "Failed to compile orc_memcpy\n"); return -1; } } #ifndef M_LN2 #define M_LN2 0.69314718055994530942 #endif orc_get_data_cache_sizes (&level1, &level2, &level3); if (level3 > 0) { max = (log(level3)/M_LN2 - 6.0) * 10 + 20; } else if (level2 > 0) { max = (log(level2)/M_LN2 - 6.0) * 10 + 20; } else { max = 140; } for(i=0;i<max;i++){ double x = i*0.1 + 6.0; int size = pow(2.0, x); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } orc_profile_init (&prof); for(j=0;j<10;j++){ OrcExecutor _ex, *ex = &_ex; void (*func) (OrcExecutor *); orc_profile_start(&prof); /* orc_memcpy (dest, src, size); */ ex->program = p; ex->n = size; ex->arrays[ORC_VAR_D1] = dest; ex->arrays[ORC_VAR_S1] = (void *)src; func = p->code_exec; func (ex); orc_profile_stop(&prof); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } } orc_profile_init (&prof_libc); for(j=0;j<10;j++){ orc_profile_start(&prof_libc); memcpy (dest, src, size); orc_profile_stop(&prof_libc); if (flush_cache) { touch (src, (1<<18)); } if (hot_src) { touch (src, size); } if (hot_dest) { touch (dest, size); } } orc_profile_get_ave_std (&prof, &ave, &std); orc_profile_get_ave_std (&prof_libc, &ave_libc, &std_libc); ave -= null; ave_libc -= null; /* printf("%d: %10.4g %10.4g %10.4g %10.4g (libc %10.4g)\n", i, ave, std, */ /* ave/(1<<i), cpufreq/(ave/(1<<i)), */ /* cpufreq/(ave_libc/(1<<i))); */ printf("%g %10.4g %10.4g\n", x, cpufreq/(ave/size), cpufreq/(ave_libc/size)); /* printf("%g %10.4g %10.4g\n", x, */ /* 32*(ave/(size)), 32*(ave_libc/(size))); */ fflush (stdout); } orc_program_free (p); free (s); free (d); return 0; }
static void schro_motion_init_functions (SchroMotion * motion) { if (motion_funcs[motion->xblen >> 1].block_accumulate == NULL) { OrcProgram *p; OrcCompileResult result; p = orc_program_new (); orc_program_set_constant_n (p, motion->xblen); orc_program_set_2d (p); orc_program_set_name (p, "block_acc_Xxn"); orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_add_source (p, 1, "s2"); orc_program_add_temporary (p, 2, "t1"); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1); orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1); result = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) { SCHRO_ERROR ("compile failed"); } motion_funcs[motion->xblen / 2].block_accumulate = p; } if (motion_funcs[motion->xblen >> 1].block_accumulate_scaled == NULL) { OrcProgram *p; OrcCompileResult result; p = orc_program_new (); orc_program_set_constant_n (p, motion->xblen); orc_program_set_2d (p); orc_program_set_name (p, "block_acc_scaled_Xxn"); orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_add_source (p, 1, "s2"); orc_program_add_parameter (p, 2, "p1"); orc_program_add_constant (p, 2, 32, "c1"); orc_program_add_constant (p, 2, 6, "c2"); orc_program_add_temporary (p, 2, "t1"); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, ORC_VAR_D1); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1); orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1); orc_program_append (p, "shrsw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1); orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1); result = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) { SCHRO_ERROR ("compile failed"); } motion_funcs[motion->xblen / 2].block_accumulate_scaled = p; } if (motion_funcs[motion->xblen >> 1].block_accumulate_dc == NULL) { OrcProgram *p; OrcCompileResult result; p = orc_program_new (); orc_program_set_constant_n (p, motion->xblen); orc_program_set_2d (p); orc_program_set_name (p, "block_acc_dc_Xxn"); orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_add_parameter (p, 2, "p1"); orc_program_add_temporary (p, 2, "t1"); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_S1, ORC_VAR_P1); orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1); result = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) { SCHRO_ERROR ("compile failed"); } motion_funcs[motion->xblen / 2].block_accumulate_dc = p; } if (motion_funcs[motion->xblen >> 1].block_accumulate_avg == NULL) { OrcProgram *p; OrcCompileResult result; p = orc_program_new (); orc_program_set_constant_n (p, motion->xblen); orc_program_set_2d (p); orc_program_set_name (p, "block_acc_avg_Xxn"); orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_add_source (p, 1, "s2"); orc_program_add_source (p, 1, "s3"); orc_program_add_temporary (p, 2, "t1"); orc_program_add_temporary (p, 1, "t2"); orc_program_append (p, "avgub", ORC_VAR_T2, ORC_VAR_S2, ORC_VAR_S3); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_T2, 0); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1); orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1); result = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) { SCHRO_ERROR ("compile failed"); } motion_funcs[motion->xblen / 2].block_accumulate_avg = p; } if (motion_funcs[motion->xblen >> 1].block_accumulate_biref == NULL) { OrcProgram *p; OrcCompileResult result; p = orc_program_new (); orc_program_set_constant_n (p, motion->xblen); orc_program_set_2d (p); orc_program_set_name (p, "block_acc_biref_Xxn"); orc_program_add_destination (p, 2, "d1"); orc_program_add_source (p, 2, "s1"); orc_program_add_source (p, 1, "s2"); orc_program_add_source (p, 1, "s3"); orc_program_add_parameter (p, 2, "p1"); orc_program_add_parameter (p, 2, "p2"); orc_program_add_constant (p, 2, 32, "c1"); orc_program_add_constant (p, 2, 6, "c2"); orc_program_add_temporary (p, 2, "t1"); orc_program_add_temporary (p, 2, "t2"); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_S2, 0); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_P1); orc_program_append (p, "convubw", ORC_VAR_T2, ORC_VAR_S3, 0); orc_program_append (p, "mullw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P2); orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_T2); orc_program_append (p, "addw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1); orc_program_append (p, "shrsw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C2); orc_program_append (p, "mullw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_S1); orc_program_append (p, "addw", ORC_VAR_D1, ORC_VAR_D1, ORC_VAR_T1); result = orc_program_compile (p); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL (result)) { SCHRO_ERROR ("compile failed"); } motion_funcs[motion->xblen / 2].block_accumulate_biref = p; } }