Пример #1
0
static void
_backup_orc_blend_u8 (OrcExecutor * ex)
{
  int i;
  int j;
  int n = ex->n;
  int m = ex->params[ORC_VAR_A1];
  orc_int8 var0;
  orc_int8 *ptr0;
  orc_int8 var4;
  const orc_int8 *ptr4;
  const orc_int8 var16 = 8;
  const int var24 = ex->params[24];
  orc_int16 var32;
  orc_int16 var33;
  orc_int16 var34;
  orc_int16 var35;
  orc_int16 var36;
  orc_int16 var37;
  orc_int16 var38;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET (ex->arrays[0], ex->params[0] * j);
    ptr4 = ORC_PTR_OFFSET (ex->arrays[4], ex->params[4] * j);

    for (i = 0; i < n; i++) {
      var0 = *ptr0;
      var4 = *ptr4;
      ptr4++;
      /* 0: convubw */
      var32 = (orc_uint8) var0;
      /* 1: convubw */
      var33 = (orc_uint8) var4;
      /* 2: subw */
      var34 = var33 - var32;
      /* 3: mullw */
      var35 = (var34 * var24) & 0xffff;
      /* 4: shlw */
      var36 = var32 << var16;
      /* 5: addw */
      var37 = var36 + var35;
      /* 6: shruw */
      var38 = ((orc_uint16) var37) >> var16;
      /* 7: convsuswb */
      var0 = ORC_CLAMP_UB (var38);
      *ptr0 = var0;
      ptr0++;
    }
  }

}
Пример #2
0
void
orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride,
    int p1, int n, int m)
{
  int i;
  int j;
  orc_int8 var0;
  orc_int8 *ptr0;
  orc_int8 var4;
  const orc_int8 *ptr4;
  const orc_int8 var16 = 8;
  const int var24 = p1;
  orc_int16 var32;
  orc_int16 var33;
  orc_int16 var34;
  orc_int16 var35;
  orc_int16 var36;
  orc_int16 var37;
  orc_int16 var38;

  for (j = 0; j < m; j++) {
    ptr0 = ORC_PTR_OFFSET (d1, d1_stride * j);
    ptr4 = ORC_PTR_OFFSET (s1, s1_stride * j);

    for (i = 0; i < n; i++) {
      var0 = *ptr0;
      var4 = *ptr4;
      ptr4++;
      /* 0: convubw */
      var32 = (orc_uint8) var0;
      /* 1: convubw */
      var33 = (orc_uint8) var4;
      /* 2: subw */
      var34 = var33 - var32;
      /* 3: mullw */
      var35 = (var34 * var24) & 0xffff;
      /* 4: shlw */
      var36 = var32 << var16;
      /* 5: addw */
      var37 = var36 + var35;
      /* 6: shruw */
      var38 = ((orc_uint16) var37) >> var16;
      /* 7: convsuswb */
      var0 = ORC_CLAMP_UB (var38);
      *ptr0 = var0;
      ptr0++;
    }
  }

}
Пример #3
0
int
orc_array_compare (OrcArray *array1, OrcArray *array2, int flags)
{
  if ((flags & ORC_TEST_FLAGS_FLOAT)) {
    if (array1->element_size == 4) {
      int j;
      for(j=0;j<array1->m;j++){
        float *a, *b;
        int i;

        a = ORC_PTR_OFFSET (array1->data, j*array1->stride);
        b = ORC_PTR_OFFSET (array2->data, j*array2->stride);

        for (i=0;i<array1->n;i++){
          if (isnan(a[i]) && isnan(b[i])) continue;
          if (a[i] == b[i]) continue;
          if (fabs(a[i] - b[i]) < MIN_NONDENORMAL) continue;
          return FALSE;
        }
      }
      return TRUE;
    } else if (array1->element_size == 8) {
      int j;
      for(j=0;j<array1->m;j++){
        double *a, *b;
        int i;

        a = ORC_PTR_OFFSET (array1->data, j*array1->stride);
        b = ORC_PTR_OFFSET (array2->data, j*array2->stride);

        for (i=0;i<array1->n;i++){
          if (isnan(a[i]) && isnan(b[i])) continue;
          if (a[i] == b[i]) continue;
          if (abs(a[i] - b[i]) < MIN_NONDENORMAL_D) continue;
          return FALSE;
        }
      }
      return TRUE;
    }
  } else {
    if (memcmp (array1->alloc_data, array2->alloc_data,
          array1->alloc_len) == 0) {
      return TRUE;
    }
  }

  return FALSE;
}
Пример #4
0
OrcArray *
orc_array_new (int n, int m, int element_size, int misalignment)
{
  OrcArray *ar;
  void *data;
#ifdef HAVE_POSIX_MEMALIGN
  int ret;
#endif

  ar = malloc (sizeof(OrcArray));
  memset (ar, 0, sizeof(OrcArray));

  ar->n = n;
  ar->m = m;
  ar->element_size = element_size;

  ar->stride = (n*element_size + EXTEND_STRIDE);
  ar->stride = (ar->stride + (ALIGNMENT-1)) & (~(ALIGNMENT-1));
  ar->alloc_len = ar->stride * (m+2*EXTEND_ROWS) + (ALIGNMENT * element_size);

#ifdef HAVE_POSIX_MEMALIGN
  ret = posix_memalign (&data, ALIGNMENT, ar->alloc_len);
#else
  data = malloc (ar->alloc_len);
#endif
  ar->alloc_data = data;

  ar->data = ORC_PTR_OFFSET (ar->alloc_data,
      ar->stride * EXTEND_ROWS + element_size * misalignment);
  
  return ar;
}
Пример #5
0
int
float_compare (OrcArray *array1, OrcArray *array2, int i, int j)
{
  void *ptr1 = ORC_PTR_OFFSET (array1->data,
      i*array1->element_size + j*array1->stride);
  void *ptr2 = ORC_PTR_OFFSET (array2->data,
      i*array2->element_size + j*array2->stride);

  switch (array1->element_size) {
    case 4:
      if (isnan(*(float *)ptr1) && isnan(*(float *)ptr2)) return TRUE;
      if (*(float *)ptr1 == *(float *)ptr2) return TRUE;
      if (fabs(*(float *)ptr1 - *(float *)ptr2) < MIN_NONDENORMAL) return TRUE;
      return FALSE;
    case 8:
      /* FIXME */
      return FALSE;
  }
  return FALSE;
}
Пример #6
0
void
orc_code_allocate_codemem (OrcCode *code, int size)
{
  OrcCodeRegion *region;
  OrcCodeChunk *chunk;
  int aligned_size = (size + 15) & (~15);

  chunk = orc_code_region_get_free_chunk (aligned_size);
  region = chunk->region;

  if (chunk->size > aligned_size) {
    orc_code_chunk_split (chunk, aligned_size);
  }

  chunk->used = TRUE;

  code->chunk = chunk;
  code->code = ORC_PTR_OFFSET(region->write_ptr, chunk->offset);
  code->exec = ORC_PTR_OFFSET(region->exec_ptr, chunk->offset);
  code->code_size = size;
  /* compiler->codeptr = ORC_PTR_OFFSET(region->write_ptr, chunk->offset); */
}
Пример #7
0
int
orc_array_check_out_of_bounds (OrcArray *array)
{
  int i;
  int j;
  unsigned char *data;
  
  data = array->alloc_data;
  for(i=0;i<array->stride * EXTEND_ROWS;i++){
    if (data[i] != ORC_OOB_VALUE) {
      printf("OOB check failed at start-%d\n", array->stride * EXTEND_ROWS - i);
      return FALSE;
    }
  }

  for(j=0;j<array->m;j++){
    data = ORC_PTR_OFFSET(array->data, array->stride * j);
    for(i=array->element_size * array->n;i<array->stride;i++){
      if (data[i] != ORC_OOB_VALUE) {
        printf("OOB check failed on row %d, end+%d\n", j,
            i - array->element_size * array->n);
        return FALSE;
      }
    }
  }

  data = ORC_PTR_OFFSET (array->data, array->stride * array->m);
  for(i=0;i<array->stride * EXTEND_ROWS;i++){
    if (data[i] != ORC_OOB_VALUE) {
      printf("OOB check failed at end+%d\n", i);
      return FALSE;
    }
  }

  return TRUE;
}
Пример #8
0
static orc_uint64
print_array_val_float (OrcArray *array, int i, int j)
{
  void *ptr = ORC_PTR_OFFSET (array->data,
      i*array->element_size + j*array->stride);

  switch (array->element_size) {
    case 4:
      if (isnan(*(float *)ptr)) {
        printf(" nan %08x", *(orc_uint32 *)ptr);
        /* This is to get around signaling/non-signaling nans in the output */
        return (*(orc_uint32 *)ptr) & 0xffbfffff;
      } else {
        printf(" %12.5g", *(float *)ptr);
        return *(orc_int32 *)ptr;
      }
    case 8:
      printf(" %12.5g", *(double *)ptr);
      return *(orc_int64 *)ptr;
    default:
      printf(" ERROR");
      return -1;
  }
}
Пример #9
0
static orc_uint64
print_array_val_hex (OrcArray *array, int i, int j)
{
  void *ptr = ORC_PTR_OFFSET (array->data,
      i*array->element_size + j*array->stride);

  switch (array->element_size) {
    case 1:
      printf(" %02x", *(orc_uint8 *)ptr);
      return *(orc_int8 *)ptr;
    case 2:
      printf(" %04x", *(orc_uint16 *)ptr);
      return *(orc_int16 *)ptr;
    case 4:
      printf(" %08x", *(orc_uint32 *)ptr);
      return *(orc_int32 *)ptr;
    case 8:
      printf(" 0x%08x%08x", (orc_uint32)((*(orc_uint64 *)ptr)>>32),
          (orc_uint32)((*(orc_uint64 *)ptr)));
      return *(orc_int64 *)ptr;
    default:
      return -1;
  }
}
Пример #10
0
void
orc_array_set_pattern_2 (OrcArray *array, OrcRandomContext *context,
    int type)
{
  int i,j;

  switch (type) {
    case ORC_PATTERN_RANDOM:
      orc_random_bits (context, array->alloc_data, array->alloc_len);
      break;
    case ORC_PATTERN_FLOAT_SMALL:
      {
        if (array->element_size != 4) return;
        for(j=0;j<array->m;j++){
          orc_union32 *data;
          int exp;

          data = ORC_PTR_OFFSET(array->data, array->stride * j);

          for(i=0;i<array->n;i++){
            data[i].i = orc_random (context);
            exp = (data[i].i & 0x7f80000) >> 23;
            exp &= 0xf;
            exp += 122;
            data[i].i &= ~0x7f800000;
            data[i].i |= (exp&0xff) << 23;
          }
        }
      }
      break;
    case ORC_PATTERN_FLOAT_SPECIAL:
      {
        if (array->element_size != 4) return;
        for(j=0;j<array->m;j++){
          orc_union32 *data;
          int x;

          data = ORC_PTR_OFFSET(array->data, array->stride * j);

          for(i=0;i<array->n;i++){
            x = i&0x1f;
            data[i].i = special_floats[x];
          }
        }
      }
      break;
    case ORC_PATTERN_FLOAT_DENORMAL:
      {
        if (array->element_size != 4) return;
        for(j=0;j<array->m;j++){
          orc_union32 *data;

          data = ORC_PTR_OFFSET(array->data, array->stride * j);

          for(i=0;i<array->n;i++){
            data[i].i = orc_random (context);
            data[i].i &= ~0x7f800000;
          }
        }
      }
      break;
    default:
      break;
  }
}
Пример #11
0
int
main(int argc, char *argv[])
{
  char *s, *d;
  orc_uint8 *src, *dest;
  OrcProfile prof;
  OrcProfile prof_libc;
  double ave, std;
  double ave_libc, std_libc;
  double null;
  int i,j;
  double cpufreq;
  int unalign;
  OrcProgram *p;
  int level1, level2, level3;
  int max;
  /* const uint8_t zero = 0; */

  orc_init ();

  /* cpufreq = 2333e6; */
  cpufreq = 1;

  if (argc > 1) {
    unalign = strtoul (argv[1], NULL, 0);
  } else {
    unalign = 0;
  }

  s = malloc(1024*1024*64+1024);
  d = malloc(1024*1024*64+1024);
  src = ORC_PTR_OFFSET(ALIGN(s,128),unalign);
  dest = ALIGN(d,128);

  orc_profile_init (&prof);
  for(j=0;j<10;j++){
    orc_profile_start(&prof);
    orc_profile_stop(&prof);
  }
  orc_profile_get_ave_std (&prof, &null, &std);
  
  {
    OrcCompileResult result;

    p = orc_program_new ();
    orc_program_set_name (p, "orc_memcpy");
    /* orc_program_set_name (p, "orc_memset"); */
    orc_program_add_destination (p, 1, "d1");
    orc_program_add_source (p, 1, "s1");
    /* orc_program_add_parameter (p, 1, "p1"); */

    orc_program_append (p, "copyb", ORC_VAR_D1, ORC_VAR_S1, ORC_VAR_D1);

    result = orc_program_compile (p);

    if (ORC_COMPILE_RESULT_IS_FATAL (result)) {
      fprintf (stderr, "Failed to compile orc_memcpy\n");
      return -1;
    }
  }

#ifndef M_LN2
#define M_LN2 0.69314718055994530942
#endif
  orc_get_data_cache_sizes (&level1, &level2, &level3);
  if (level3 > 0) {
    max = (log(level3)/M_LN2 - 6.0) * 10 + 20;
  } else if (level2 > 0) {
    max = (log(level2)/M_LN2 - 6.0) * 10 + 20;
  } else {
    max = 140;
  }

  for(i=0;i<max;i++){
    double x = i*0.1 + 6.0;
    int size = pow(2.0, x);

    if (flush_cache) {
      touch (src, (1<<18));
    }
    if (hot_src) {
      touch (src, size);
    }
    if (hot_dest) {
      touch (dest, size);
    }

    orc_profile_init (&prof);
    for(j=0;j<10;j++){
      OrcExecutor _ex, *ex = &_ex;
      void (*func) (OrcExecutor *);

      orc_profile_start(&prof);
      /* orc_memcpy (dest, src, size); */
      ex->program = p;
      ex->n = size;
      ex->arrays[ORC_VAR_D1] = dest;
      ex->arrays[ORC_VAR_S1] = (void *)src;

      func = p->code_exec;
      func (ex);

      orc_profile_stop(&prof);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_init (&prof_libc);
    for(j=0;j<10;j++){
      orc_profile_start(&prof_libc);
      memcpy (dest, src, size);
      orc_profile_stop(&prof_libc);
      if (flush_cache) {
        touch (src, (1<<18));
      }
      if (hot_src) {
        touch (src, size);
      }
      if (hot_dest) {
        touch (dest, size);
      }
    }

    orc_profile_get_ave_std (&prof, &ave, &std);
    orc_profile_get_ave_std (&prof_libc, &ave_libc, &std_libc);

    ave -= null;
    ave_libc -= null;

    /* printf("%d: %10.4g %10.4g %10.4g %10.4g (libc %10.4g)\n", i, ave, std, */
    /*     ave/(1<<i), cpufreq/(ave/(1<<i)), */
    /*     cpufreq/(ave_libc/(1<<i))); */
    printf("%g %10.4g %10.4g\n", x,
        cpufreq/(ave/size), cpufreq/(ave_libc/size));
    /* printf("%g %10.4g %10.4g\n", x, */
    /*     32*(ave/(size)), 32*(ave_libc/(size))); */
    fflush (stdout);
  }

  orc_program_free (p);
  free (s);
  free (d);

  return 0;
}
Пример #12
0
void
orc_executor_emulate (OrcExecutor *ex)
{
  int i;
  int j;
  int k;
  int m, m_index;
  OrcCode *code;
  OrcInstruction *insn;
  OrcStaticOpcode *opcode;
  OrcOpcodeExecutor *opcode_ex;
  void *tmpspace[ORC_N_COMPILER_VARIABLES] = { 0 };

  if (ex->program) {
    code = ex->program->orccode;
  } else {
    code = (OrcCode *)ex->arrays[ORC_VAR_A2];
  }

  ex->accumulators[0] = 0;
  ex->accumulators[1] = 0;
  ex->accumulators[2] = 0;
  ex->accumulators[3] = 0;

  ORC_DEBUG("emulating");

  memset (&opcode_ex, 0, sizeof(opcode_ex));

  if (code == NULL) {
    ORC_ERROR("attempt to run program that failed to compile");
    ORC_ASSERT(0);
  }

  if (code->is_2d) {
    m = ORC_EXECUTOR_M(ex);
  } else {
    m = 1;
  }

  for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){
    OrcCodeVariable *var = code->vars + i;

    if (var->size) {
      tmpspace[i] = malloc(ORC_MAX_VAR_SIZE * CHUNK_SIZE);
    }
  }

  opcode_ex = malloc(sizeof(OrcOpcodeExecutor)*code->n_insns);

  for(j=0;j<code->n_insns;j++){
    insn = code->insns + j;
    opcode = insn->opcode;

    opcode_ex[j].emulateN = opcode->emulateN;
    opcode_ex[j].shift = 0;
    if (insn->flags & ORC_INSTRUCTION_FLAG_X2) {
      opcode_ex[j].shift = 1;
    } else if (insn->flags & ORC_INSTRUCTION_FLAG_X4) {
      opcode_ex[j].shift = 2;
    }

    for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) {
      OrcCodeVariable *var = code->vars + insn->src_args[k];
      if (opcode->src_size[k] == 0) continue;

      if (var->vartype == ORC_VAR_TYPE_CONST) {
        opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]];
        /* FIXME hack */
        load_constant (tmpspace[insn->src_args[k]], 8,
            var->value.i);
      } else if (var->vartype == ORC_VAR_TYPE_PARAM) {
        opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]];
        /* FIXME hack */
        load_constant (tmpspace[insn->src_args[k]], 8,
            (orc_uint64)(orc_uint32)ex->params[insn->src_args[k]] |
            (((orc_uint64)(orc_uint32)ex->params[insn->src_args[k] +
             (ORC_VAR_T1 - ORC_VAR_P1)])<<32));
      } else if (var->vartype == ORC_VAR_TYPE_TEMP) {
        opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]];
      } else if (var->vartype == ORC_VAR_TYPE_SRC) {
        if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) {
          ORC_ERROR("Unaligned array for src%d, program %s",
              (insn->src_args[k]-ORC_VAR_S1), ex->program->name);
        }
        opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]];
      } else if (var->vartype == ORC_VAR_TYPE_DEST) {
        if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) {
          ORC_ERROR("Unaligned array for dest%d, program %s",
              (insn->src_args[k]-ORC_VAR_D1), ex->program->name);
        }
        opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]];
      }
    }
    for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) {
      OrcCodeVariable *var = code->vars + insn->dest_args[k];
      if (opcode->dest_size[k] == 0) continue;

      if (var->vartype == ORC_VAR_TYPE_TEMP) {
        ORC_DEBUG("dest vartype tmp %d", insn->dest_args[k]);
        opcode_ex[j].dest_ptrs[k] = tmpspace[insn->dest_args[k]];
      } else if (var->vartype == ORC_VAR_TYPE_ACCUMULATOR) {
        opcode_ex[j].dest_ptrs[k] =
          &ex->accumulators[insn->dest_args[k] - ORC_VAR_A1];
      } else if (var->vartype == ORC_VAR_TYPE_DEST) {
        if (ORC_PTR_TO_INT(ex->arrays[insn->dest_args[k]]) & (var->size - 1)) {
          ORC_ERROR("Unaligned array for dest%d, program %s",
              (insn->dest_args[k]-ORC_VAR_D1), ex->program->name);
        }
        opcode_ex[j].dest_ptrs[k] = ex->arrays[insn->dest_args[k]];
      }
    }
    ORC_DEBUG("opcode %s %p %p %p", opcode->name,
        opcode_ex[j].dest_ptrs[0], opcode_ex[j].src_ptrs[0],
        opcode_ex[j].src_ptrs[1]);
  }
  
  ORC_DEBUG("src ptr %p stride %d", ex->arrays[ORC_VAR_S1], ex->params[ORC_VAR_S1]);
  for(m_index=0;m_index<m;m_index++){
    ORC_DEBUG("m_index %d m %d", m_index, m);

    for(j=0;j<code->n_insns;j++){
      insn = code->insns + j;
      opcode = insn->opcode;

      for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) {
        OrcCodeVariable *var = code->vars + insn->src_args[k];
        if (opcode->src_size[k] == 0) continue;

        if (var->vartype == ORC_VAR_TYPE_SRC) {
          opcode_ex[j].src_ptrs[k] =
            ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]],
                ex->params[insn->src_args[k]]*m_index);
        } else if (var->vartype == ORC_VAR_TYPE_DEST) {
          opcode_ex[j].src_ptrs[k] =
            ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]],
                ex->params[insn->src_args[k]]*m_index);
        }
      }
      for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) {
        OrcCodeVariable *var = code->vars + insn->dest_args[k];
        if (opcode->dest_size[k] == 0) continue;

        if (var->vartype == ORC_VAR_TYPE_DEST) {
          opcode_ex[j].dest_ptrs[k] =
            ORC_PTR_OFFSET(ex->arrays[insn->dest_args[k]],
                ex->params[insn->dest_args[k]]*m_index);
        }
      }
    }

    for(i=0;i<ex->n;i+=CHUNK_SIZE){
      for(j=0;j<code->n_insns;j++){
        if (ex->n - i >= CHUNK_SIZE) {
          opcode_ex[j].emulateN (opcode_ex + j, i, CHUNK_SIZE << opcode_ex[j].shift);
        } else {
          opcode_ex[j].emulateN (opcode_ex + j, i, (ex->n - i) << opcode_ex[j].shift);
        }
      }
    }
  }

  free (opcode_ex);
  for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){
    if (tmpspace[i]) free (tmpspace[i]);
  }
}