int orc_compiler_get_temp_reg (OrcCompiler *compiler) { int j; for(j=0;j<ORC_N_REGS;j++){ compiler->alloc_regs[j] = 0; } for(j=0;j<ORC_N_COMPILER_VARIABLES;j++){ if (!compiler->vars[j].alloc) continue; ORC_DEBUG("var %d: %d %d %d", j, compiler->vars[j].alloc, compiler->vars[j].first_use, compiler->vars[j].last_use); if (compiler->vars[j].first_use == -1) { compiler->alloc_regs[compiler->vars[j].alloc] = 1; } else if (compiler->vars[j].first_use <= compiler->insn_index && compiler->vars[j].last_use >= compiler->insn_index) { compiler->alloc_regs[compiler->vars[j].alloc] = 1; } } for(j=0;j<compiler->n_constants;j++){ if (compiler->constants[j].alloc_reg) { compiler->alloc_regs[compiler->constants[j].alloc_reg] = 1; } } ORC_DEBUG("at insn %d %s", compiler->insn_index, compiler->insns[compiler->insn_index].opcode->name); for(j=0;j<8;j++){ ORC_DEBUG("xmm%d: %d %d", j, compiler->valid_regs[ORC_VEC_REG_BASE + j], compiler->alloc_regs[ORC_VEC_REG_BASE + j]); } for(j=compiler->min_temp_reg;j<ORC_VEC_REG_BASE+16;j++){ if (compiler->valid_regs[j] && !compiler->alloc_regs[j]) { compiler->min_temp_reg = j+1; if (compiler->max_used_temp_reg < j) compiler->max_used_temp_reg = j; return j; } } ORC_COMPILER_ERROR(compiler,"no temporary register available"); compiler->result = ORC_COMPILE_RESULT_UNKNOWN_COMPILE; return 0; }
int orc_compiler_get_constant_reg (OrcCompiler *compiler) { int j; for(j=0;j<ORC_N_REGS;j++){ compiler->alloc_regs[j] = 0; } for(j=0;j<ORC_N_COMPILER_VARIABLES;j++){ if (!compiler->vars[j].alloc) continue; ORC_DEBUG("var %d: %d %d %d", j, compiler->vars[j].alloc, compiler->vars[j].first_use, compiler->vars[j].last_use); if (compiler->vars[j].first_use == -1) { compiler->alloc_regs[compiler->vars[j].alloc] = 1; } else if (compiler->vars[j].last_use != -1) { compiler->alloc_regs[compiler->vars[j].alloc] = 1; } } for(j=0;j<compiler->n_constants;j++){ if (compiler->constants[j].alloc_reg) { compiler->alloc_regs[compiler->constants[j].alloc_reg] = 1; } } for(j=ORC_VEC_REG_BASE;j<=compiler->max_used_temp_reg;j++) { compiler->alloc_regs[j] = 1; } for(j=0;j<8;j++){ ORC_DEBUG("xmm%d: %d %d", j, compiler->valid_regs[ORC_VEC_REG_BASE + j], compiler->alloc_regs[ORC_VEC_REG_BASE + j]); } for(j=compiler->max_used_temp_reg;j<ORC_VEC_REG_BASE+16;j++){ if (compiler->valid_regs[j] && !compiler->alloc_regs[j]) { return j; } } return 0; }
double orc_test_performance_full (OrcProgram *program, int flags, const char *target_name) { OrcExecutor *ex; int n; int m; OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL }; OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL }; OrcArray *src[8] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; int i, j; OrcCompileResult result; OrcProfile prof; double ave, std; OrcTarget *target; int misalignment; ORC_DEBUG ("got here"); target = orc_target_get_by_name (target_name); if (!(flags & ORC_TEST_FLAGS_BACKUP)) { unsigned int flags; flags = orc_target_get_default_flags (target); result = orc_program_compile_full (program, target, flags); if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL(result)) { //printf("compile failed\n"); orc_program_reset (program); return 0; } } if (program->constant_n > 0) { n = program->constant_n; } else { //n = 64 + (orc_random(&rand_context)&0xf); n = 1000; } ex = orc_executor_new (program); orc_executor_set_n (ex, n); if (program->is_2d) { if (program->constant_m > 0) { m = program->constant_m; } else { m = 8 + (orc_random(&rand_context)&0xf); } } else { m = 1; } orc_executor_set_m (ex, m); ORC_DEBUG("size %d %d", ex->n, ex->params[ORC_VAR_A1]); misalignment = 0; for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].name == NULL) continue; if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_random (src[i-ORC_VAR_S1], &rand_context); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE); dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) { orc_executor_set_param (ex, i, 2); } } ORC_DEBUG ("running"); orc_profile_init (&prof); for(i=0;i<10;i++){ orc_executor_set_n (ex, n); orc_executor_set_m (ex, m); for(j=0;j<ORC_N_VARIABLES;j++){ if (program->vars[j].vartype == ORC_VAR_TYPE_DEST) { orc_executor_set_array (ex, j, dest_exec[j-ORC_VAR_D1]->data); orc_executor_set_stride (ex, j, dest_exec[j-ORC_VAR_D1]->stride); } if (program->vars[j].vartype == ORC_VAR_TYPE_SRC) { orc_executor_set_array (ex, j, src[j-ORC_VAR_S1]->data); orc_executor_set_stride (ex, j, src[j-ORC_VAR_S1]->stride); } } if (flags & ORC_TEST_FLAGS_BACKUP) { orc_profile_start (&prof); orc_executor_run_backup (ex); orc_profile_stop (&prof); } else if (flags & ORC_TEST_FLAGS_EMULATE) { orc_profile_start (&prof); orc_executor_emulate (ex); orc_profile_stop (&prof); } else { orc_profile_start (&prof); orc_executor_run (ex); orc_profile_stop (&prof); } } ORC_DEBUG ("done running"); orc_profile_get_ave_std (&prof, &ave, &std); for(i=0;i<4;i++){ if (dest_exec[i]) orc_array_free (dest_exec[i]); if (dest_emul[i]) orc_array_free (dest_emul[i]); } for(i=0;i<8;i++){ if (src[i]) orc_array_free (src[i]); } orc_executor_free (ex); orc_program_reset (program); return ave/(n*m); }
OrcTestResult orc_test_compare_output_full (OrcProgram *program, int flags) { OrcExecutor *ex; int n; int m; OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL }; OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL }; OrcArray *src[8] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; int i; int j; int k; int have_dest = FALSE; OrcCompileResult result; int have_acc = FALSE; int acc_exec = 0, acc_emul = 0; int ret = ORC_TEST_OK; int bad = 0; int misalignment; ORC_DEBUG ("got here"); { OrcTarget *target; unsigned int flags; target = orc_target_get_default (); flags = orc_target_get_default_flags (target); result = orc_program_compile_full (program, target, flags); if (ORC_COMPILE_RESULT_IS_FATAL(result)) { ret = ORC_TEST_FAILED; goto out; } if (!ORC_COMPILE_RESULT_IS_SUCCESSFUL(result)) { ret = ORC_TEST_INDETERMINATE; goto out; } } if (program->constant_n > 0) { n = program->constant_n; } else { n = 64 + (orc_random(&rand_context)&0xf); } ex = orc_executor_new (program); orc_executor_set_n (ex, n); if (program->is_2d) { if (program->constant_m > 0) { m = program->constant_m; } else { m = 8 + (orc_random(&rand_context)&0xf); } } else { m = 1; } orc_executor_set_m (ex, m); ORC_DEBUG("size %d %d", ex->n, ex->params[ORC_VAR_A1]); misalignment = 0; for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].name == NULL) continue; if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_random (src[i-ORC_VAR_S1], &rand_context); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE); dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, misalignment); orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE); misalignment++; } else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) { switch (program->vars[i].param_type) { case ORC_PARAM_TYPE_INT: orc_executor_set_param (ex, i, 2); break; case ORC_PARAM_TYPE_FLOAT: orc_executor_set_param_float (ex, i, 2.0); break; case ORC_PARAM_TYPE_INT64: orc_executor_set_param_int64 (ex, i, 2); break; case ORC_PARAM_TYPE_DOUBLE: orc_executor_set_param_double (ex, i, 2.0); break; } } } for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { orc_executor_set_array (ex, i, dest_exec[i-ORC_VAR_D1]->data); orc_executor_set_stride (ex, i, dest_exec[i-ORC_VAR_D1]->stride); have_dest = TRUE; } if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { orc_executor_set_array (ex, i, src[i-ORC_VAR_S1]->data); orc_executor_set_stride (ex, i, src[i-ORC_VAR_S1]->stride); } } ORC_DEBUG ("running"); if (flags & ORC_TEST_FLAGS_BACKUP) { orc_executor_run_backup (ex); } else { orc_executor_run (ex); } ORC_DEBUG ("done running"); for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].vartype == ORC_VAR_TYPE_ACCUMULATOR) { acc_exec = ex->accumulators[0]; have_acc = TRUE; } } for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) { orc_executor_set_array (ex, i, dest_emul[i]->data); orc_executor_set_stride (ex, i, dest_emul[i]->stride); } if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) { ORC_DEBUG("setting array %p", src[i-ORC_VAR_S1]->data); orc_executor_set_array (ex, i, src[i-ORC_VAR_S1]->data); orc_executor_set_stride (ex, i, src[i-ORC_VAR_S1]->stride); } } orc_executor_emulate (ex); for(i=0;i<ORC_N_VARIABLES;i++){ if (program->vars[i].vartype == ORC_VAR_TYPE_ACCUMULATOR) { acc_emul = ex->accumulators[0]; } } for(k=ORC_VAR_D1;k<ORC_VAR_D1+4;k++){ if (program->vars[k].size > 0) { if (!orc_array_compare (dest_exec[k-ORC_VAR_D1], dest_emul[k-ORC_VAR_D1], flags)) { printf("dest array %d bad\n", k); bad = TRUE; } if (!orc_array_check_out_of_bounds (dest_exec[k-ORC_VAR_D1])) { printf("out of bounds failure\n"); ret = ORC_TEST_FAILED; } } } if (bad) { for(j=0;j<m;j++){ for(i=0;i<n;i++){ orc_uint64 a,b; int l; int line_bad = 0; printf("%2d %2d:", i, j); for(l=ORC_VAR_S1;l<ORC_VAR_S1+8;l++){ if (program->vars[l].size > 0) { if (flags & ORC_TEST_FLAGS_FLOAT) { print_array_val_float (src[l-ORC_VAR_S1], i, j); } else { print_array_val_hex (src[l-ORC_VAR_S1], i, j); } } } printf(" ->"); for(l=ORC_VAR_D1;l<ORC_VAR_D1+4;l++){ if (program->vars[l].size > 0) { if (flags & ORC_TEST_FLAGS_FLOAT) { a = print_array_val_float (dest_emul[l-ORC_VAR_D1], i, j); b = print_array_val_float (dest_exec[l-ORC_VAR_D1], i, j); if (!float_compare (dest_emul[l-ORC_VAR_D1], dest_exec[l-ORC_VAR_D1], i, j) != 0) { line_bad = TRUE; } } else { a = print_array_val_hex (dest_emul[l-ORC_VAR_D1], i, j); b = print_array_val_hex (dest_exec[l-ORC_VAR_D1], i, j); if (a != b) { line_bad = TRUE; } } } } if (line_bad) { printf(" *"); } printf("\n"); } } ret = ORC_TEST_FAILED; } if (have_acc) { if (acc_emul != acc_exec) { for(j=0;j<m;j++){ for(i=0;i<n;i++){ printf("%2d %2d:", i, j); for(k=0;k<ORC_N_VARIABLES;k++){ if (program->vars[k].name == NULL) continue; if (program->vars[k].vartype == ORC_VAR_TYPE_SRC && program->vars[k].size > 0) { if (flags & ORC_TEST_FLAGS_FLOAT) { print_array_val_float (src[k-ORC_VAR_S1], i, j); } else { print_array_val_signed (src[k-ORC_VAR_S1], i, j); } } } printf(" -> acc\n"); } } printf("acc %d %d\n", acc_emul, acc_exec); ret = ORC_TEST_FAILED; } } if (ret == ORC_TEST_FAILED) { printf("%s", orc_program_get_asm_code (program)); } for(i=0;i<4;i++){ if (dest_exec[i]) orc_array_free (dest_exec[i]); if (dest_emul[i]) orc_array_free (dest_emul[i]); } for(i=0;i<8;i++){ if (src[i]) orc_array_free (src[i]); } orc_executor_free (ex); out: orc_program_reset (program); return ret; }
void orc_executor_emulate (OrcExecutor *ex) { int i; int j; int k; int m, m_index; OrcCode *code; OrcInstruction *insn; OrcStaticOpcode *opcode; OrcOpcodeExecutor *opcode_ex; void *tmpspace[ORC_N_COMPILER_VARIABLES] = { 0 }; if (ex->program) { code = ex->program->orccode; } else { code = (OrcCode *)ex->arrays[ORC_VAR_A2]; } ex->accumulators[0] = 0; ex->accumulators[1] = 0; ex->accumulators[2] = 0; ex->accumulators[3] = 0; ORC_DEBUG("emulating"); memset (&opcode_ex, 0, sizeof(opcode_ex)); if (code == NULL) { ORC_ERROR("attempt to run program that failed to compile"); ORC_ASSERT(0); } if (code->is_2d) { m = ORC_EXECUTOR_M(ex); } else { m = 1; } for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){ OrcCodeVariable *var = code->vars + i; if (var->size) { tmpspace[i] = malloc(ORC_MAX_VAR_SIZE * CHUNK_SIZE); } } opcode_ex = malloc(sizeof(OrcOpcodeExecutor)*code->n_insns); for(j=0;j<code->n_insns;j++){ insn = code->insns + j; opcode = insn->opcode; opcode_ex[j].emulateN = opcode->emulateN; opcode_ex[j].shift = 0; if (insn->flags & ORC_INSTRUCTION_FLAG_X2) { opcode_ex[j].shift = 1; } else if (insn->flags & ORC_INSTRUCTION_FLAG_X4) { opcode_ex[j].shift = 2; } for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) { OrcCodeVariable *var = code->vars + insn->src_args[k]; if (opcode->src_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_CONST) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ load_constant (tmpspace[insn->src_args[k]], 8, var->value.i); } else if (var->vartype == ORC_VAR_TYPE_PARAM) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ load_constant (tmpspace[insn->src_args[k]], 8, (orc_uint64)(orc_uint32)ex->params[insn->src_args[k]] | (((orc_uint64)(orc_uint32)ex->params[insn->src_args[k] + (ORC_VAR_T1 - ORC_VAR_P1)])<<32)); } else if (var->vartype == ORC_VAR_TYPE_TEMP) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_SRC) { if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for src%d, program %s", (insn->src_args[k]-ORC_VAR_S1), ex->program->name); } opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_DEST) { if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for dest%d, program %s", (insn->src_args[k]-ORC_VAR_D1), ex->program->name); } opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]]; } } for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) { OrcCodeVariable *var = code->vars + insn->dest_args[k]; if (opcode->dest_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_TEMP) { ORC_DEBUG("dest vartype tmp %d", insn->dest_args[k]); opcode_ex[j].dest_ptrs[k] = tmpspace[insn->dest_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_ACCUMULATOR) { opcode_ex[j].dest_ptrs[k] = &ex->accumulators[insn->dest_args[k] - ORC_VAR_A1]; } else if (var->vartype == ORC_VAR_TYPE_DEST) { if (ORC_PTR_TO_INT(ex->arrays[insn->dest_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for dest%d, program %s", (insn->dest_args[k]-ORC_VAR_D1), ex->program->name); } opcode_ex[j].dest_ptrs[k] = ex->arrays[insn->dest_args[k]]; } } ORC_DEBUG("opcode %s %p %p %p", opcode->name, opcode_ex[j].dest_ptrs[0], opcode_ex[j].src_ptrs[0], opcode_ex[j].src_ptrs[1]); } ORC_DEBUG("src ptr %p stride %d", ex->arrays[ORC_VAR_S1], ex->params[ORC_VAR_S1]); for(m_index=0;m_index<m;m_index++){ ORC_DEBUG("m_index %d m %d", m_index, m); for(j=0;j<code->n_insns;j++){ insn = code->insns + j; opcode = insn->opcode; for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) { OrcCodeVariable *var = code->vars + insn->src_args[k]; if (opcode->src_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_SRC) { opcode_ex[j].src_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]], ex->params[insn->src_args[k]]*m_index); } else if (var->vartype == ORC_VAR_TYPE_DEST) { opcode_ex[j].src_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]], ex->params[insn->src_args[k]]*m_index); } } for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) { OrcCodeVariable *var = code->vars + insn->dest_args[k]; if (opcode->dest_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_DEST) { opcode_ex[j].dest_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->dest_args[k]], ex->params[insn->dest_args[k]]*m_index); } } } for(i=0;i<ex->n;i+=CHUNK_SIZE){ for(j=0;j<code->n_insns;j++){ if (ex->n - i >= CHUNK_SIZE) { opcode_ex[j].emulateN (opcode_ex + j, i, CHUNK_SIZE << opcode_ex[j].shift); } else { opcode_ex[j].emulateN (opcode_ex + j, i, (ex->n - i) << opcode_ex[j].shift); } } } } free (opcode_ex); for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){ if (tmpspace[i]) free (tmpspace[i]); } }