static void orc_multiply_and_acc_biref_Xxn_s16_u8 (int16_t * d1, int d1_stride, const int16_t * s1, int s1_stride, const uint8_t * s2, int s2_stride, const uint8_t * s3, int s3_stride, int p1, int p2, int n, int m) { OrcExecutor _ex, *ex = &_ex; OrcProgram *p; void (*func) (OrcExecutor *); p = motion_funcs[n >> 1].block_accumulate_biref; ex->program = p; ex->n = n; ORC_EXECUTOR_M (ex) = m; ex->arrays[ORC_VAR_D1] = d1; ex->params[ORC_VAR_D1] = d1_stride; ex->arrays[ORC_VAR_S1] = (void *) s1; ex->params[ORC_VAR_S1] = s1_stride; ex->arrays[ORC_VAR_S2] = (void *) s2; ex->params[ORC_VAR_S2] = s2_stride; ex->arrays[ORC_VAR_S3] = (void *) s3; ex->params[ORC_VAR_S3] = s3_stride; ex->params[ORC_VAR_P1] = p1; ex->params[ORC_VAR_P2] = p2; func = p->code_exec; func (ex); }
void orc_blend_u8 (guint8 * d1, int d1_stride, const guint8 * s1, int s1_stride, int p1, int n, int m) { OrcExecutor _ex, *ex = &_ex; static int p_inited = 0; static OrcProgram *p = 0; void (*func) (OrcExecutor *); if (!p_inited) { orc_once_mutex_lock (); if (!p_inited) { OrcCompileResult result; p = orc_program_new (); orc_program_set_2d (p); orc_program_set_name (p, "orc_blend_u8"); orc_program_set_backup_function (p, _backup_orc_blend_u8); orc_program_add_destination (p, 1, "d1"); orc_program_add_source (p, 1, "s1"); orc_program_add_constant (p, 1, 8, "c1"); orc_program_add_parameter (p, 2, "p1"); orc_program_add_temporary (p, 2, "t1"); orc_program_add_temporary (p, 2, "t2"); orc_program_append (p, "convubw", ORC_VAR_T1, ORC_VAR_D1, ORC_VAR_D1); orc_program_append (p, "convubw", ORC_VAR_T2, ORC_VAR_S1, ORC_VAR_D1); orc_program_append (p, "subw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_T1); orc_program_append (p, "mullw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_P1); orc_program_append (p, "shlw", ORC_VAR_T1, ORC_VAR_T1, ORC_VAR_C1); orc_program_append (p, "addw", ORC_VAR_T2, ORC_VAR_T1, ORC_VAR_T2); orc_program_append (p, "shruw", ORC_VAR_T2, ORC_VAR_T2, ORC_VAR_C1); orc_program_append (p, "convsuswb", ORC_VAR_D1, ORC_VAR_T2, ORC_VAR_D1); result = orc_program_compile (p); } p_inited = TRUE; orc_once_mutex_unlock (); } ex->program = p; ex->n = n; ORC_EXECUTOR_M (ex) = m; ex->arrays[ORC_VAR_D1] = d1; ex->params[ORC_VAR_D1] = d1_stride; ex->arrays[ORC_VAR_S1] = (void *) s1; ex->params[ORC_VAR_S1] = s1_stride; ex->params[ORC_VAR_P1] = p1; func = p->code_exec; func (ex); }
void orc_executor_emulate (OrcExecutor *ex) { int i; int j; int k; int m, m_index; OrcCode *code; OrcInstruction *insn; OrcStaticOpcode *opcode; OrcOpcodeExecutor *opcode_ex; void *tmpspace[ORC_N_COMPILER_VARIABLES] = { 0 }; if (ex->program) { code = ex->program->orccode; } else { code = (OrcCode *)ex->arrays[ORC_VAR_A2]; } ex->accumulators[0] = 0; ex->accumulators[1] = 0; ex->accumulators[2] = 0; ex->accumulators[3] = 0; ORC_DEBUG("emulating"); memset (&opcode_ex, 0, sizeof(opcode_ex)); if (code == NULL) { ORC_ERROR("attempt to run program that failed to compile"); ORC_ASSERT(0); } if (code->is_2d) { m = ORC_EXECUTOR_M(ex); } else { m = 1; } for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){ OrcCodeVariable *var = code->vars + i; if (var->size) { tmpspace[i] = malloc(ORC_MAX_VAR_SIZE * CHUNK_SIZE); } } opcode_ex = malloc(sizeof(OrcOpcodeExecutor)*code->n_insns); for(j=0;j<code->n_insns;j++){ insn = code->insns + j; opcode = insn->opcode; opcode_ex[j].emulateN = opcode->emulateN; opcode_ex[j].shift = 0; if (insn->flags & ORC_INSTRUCTION_FLAG_X2) { opcode_ex[j].shift = 1; } else if (insn->flags & ORC_INSTRUCTION_FLAG_X4) { opcode_ex[j].shift = 2; } for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) { OrcCodeVariable *var = code->vars + insn->src_args[k]; if (opcode->src_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_CONST) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ load_constant (tmpspace[insn->src_args[k]], 8, var->value.i); } else if (var->vartype == ORC_VAR_TYPE_PARAM) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; /* FIXME hack */ load_constant (tmpspace[insn->src_args[k]], 8, (orc_uint64)(orc_uint32)ex->params[insn->src_args[k]] | (((orc_uint64)(orc_uint32)ex->params[insn->src_args[k] + (ORC_VAR_T1 - ORC_VAR_P1)])<<32)); } else if (var->vartype == ORC_VAR_TYPE_TEMP) { opcode_ex[j].src_ptrs[k] = tmpspace[insn->src_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_SRC) { if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for src%d, program %s", (insn->src_args[k]-ORC_VAR_S1), ex->program->name); } opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_DEST) { if (ORC_PTR_TO_INT(ex->arrays[insn->src_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for dest%d, program %s", (insn->src_args[k]-ORC_VAR_D1), ex->program->name); } opcode_ex[j].src_ptrs[k] = ex->arrays[insn->src_args[k]]; } } for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) { OrcCodeVariable *var = code->vars + insn->dest_args[k]; if (opcode->dest_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_TEMP) { ORC_DEBUG("dest vartype tmp %d", insn->dest_args[k]); opcode_ex[j].dest_ptrs[k] = tmpspace[insn->dest_args[k]]; } else if (var->vartype == ORC_VAR_TYPE_ACCUMULATOR) { opcode_ex[j].dest_ptrs[k] = &ex->accumulators[insn->dest_args[k] - ORC_VAR_A1]; } else if (var->vartype == ORC_VAR_TYPE_DEST) { if (ORC_PTR_TO_INT(ex->arrays[insn->dest_args[k]]) & (var->size - 1)) { ORC_ERROR("Unaligned array for dest%d, program %s", (insn->dest_args[k]-ORC_VAR_D1), ex->program->name); } opcode_ex[j].dest_ptrs[k] = ex->arrays[insn->dest_args[k]]; } } ORC_DEBUG("opcode %s %p %p %p", opcode->name, opcode_ex[j].dest_ptrs[0], opcode_ex[j].src_ptrs[0], opcode_ex[j].src_ptrs[1]); } ORC_DEBUG("src ptr %p stride %d", ex->arrays[ORC_VAR_S1], ex->params[ORC_VAR_S1]); for(m_index=0;m_index<m;m_index++){ ORC_DEBUG("m_index %d m %d", m_index, m); for(j=0;j<code->n_insns;j++){ insn = code->insns + j; opcode = insn->opcode; for(k=0;k<ORC_STATIC_OPCODE_N_SRC;k++) { OrcCodeVariable *var = code->vars + insn->src_args[k]; if (opcode->src_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_SRC) { opcode_ex[j].src_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]], ex->params[insn->src_args[k]]*m_index); } else if (var->vartype == ORC_VAR_TYPE_DEST) { opcode_ex[j].src_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->src_args[k]], ex->params[insn->src_args[k]]*m_index); } } for(k=0;k<ORC_STATIC_OPCODE_N_DEST;k++) { OrcCodeVariable *var = code->vars + insn->dest_args[k]; if (opcode->dest_size[k] == 0) continue; if (var->vartype == ORC_VAR_TYPE_DEST) { opcode_ex[j].dest_ptrs[k] = ORC_PTR_OFFSET(ex->arrays[insn->dest_args[k]], ex->params[insn->dest_args[k]]*m_index); } } } for(i=0;i<ex->n;i+=CHUNK_SIZE){ for(j=0;j<code->n_insns;j++){ if (ex->n - i >= CHUNK_SIZE) { opcode_ex[j].emulateN (opcode_ex + j, i, CHUNK_SIZE << opcode_ex[j].shift); } else { opcode_ex[j].emulateN (opcode_ex + j, i, (ex->n - i) << opcode_ex[j].shift); } } } } free (opcode_ex); for(i=0;i<ORC_N_COMPILER_VARIABLES;i++){ if (tmpspace[i]) free (tmpspace[i]); } }
void orc_executor_set_m (OrcExecutor *ex, int m) { ORC_EXECUTOR_M(ex) = m; }