void genmultu() { #ifdef INTERPRET_MULTU gencallinterp((unsigned long)MULTU, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned long *)&lo, 0); allocate_register_manually_w(EDX, (unsigned long *)&hi, 0); rs = allocate_register((unsigned long*)dst->f.r.rs); rt = allocate_register((unsigned long*)dst->f.r.rt); mov_reg32_reg32(EAX, rs); mul_reg32(rt); #endif }
void genmultu(usf_state_t * state) { #ifdef INTERPRET_MULTU gencallinterp(state, (unsigned int)state->current_instruction_table.MULTU, 0); #else int rs, rt; allocate_register_manually_w(state, EAX, (unsigned int *)&state->lo, 0); allocate_register_manually_w(state, EDX, (unsigned int *)&state->hi, 0); rs = allocate_register(state, (unsigned int*)state->dst->f.r.rs); rt = allocate_register(state, (unsigned int*)state->dst->f.r.rt); mov_reg32_reg32(state, EAX, rs); mul_reg32(state, rt); #endif }
void genmult(void) { #ifdef INTERPRET_MULT gencallinterp((unsigned int)cached_interpreter_table.MULT, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned int *)&lo, 0); allocate_register_manually_w(EDX, (unsigned int *)&hi, 0); rs = allocate_register((unsigned int*)dst->f.r.rs); rt = allocate_register((unsigned int*)dst->f.r.rt); mov_reg32_reg32(EAX, rs); imul_reg32(rt); #endif }
void genmultu(void) { #ifdef INTERPRET_MULTU gencallinterp((unsigned int)cached_interpreter_table.MULTU, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned int *)r4300_mult_lo(), 0); allocate_register_manually_w(EDX, (unsigned int *)r4300_mult_hi(), 0); rs = allocate_register((unsigned int*)g_dev.r4300.recomp.dst->f.r.rs); rt = allocate_register((unsigned int*)g_dev.r4300.recomp.dst->f.r.rt); mov_reg32_reg32(EAX, rs); mul_reg32(rt); #endif }
void gensllv(usf_state_t * state) { #ifdef INTERPRET_SLLV gencallinterp(state, (unsigned int)state->current_instruction_table.SLLV, 0); #else int rt, rd; allocate_register_manually(state, ECX, (unsigned int *)state->dst->f.r.rs); rt = allocate_register(state, (unsigned int *)state->dst->f.r.rt); rd = allocate_register_w(state, (unsigned int *)state->dst->f.r.rd); if (rd != ECX) { mov_reg32_reg32(state, rd, rt); shl_reg32_cl(state, rd); } else { int temp = lru_register(state); free_register(state, temp); mov_reg32_reg32(state, temp, rt); shl_reg32_cl(state, temp); mov_reg32_reg32(state, rd, temp); } #endif }
void gensrav(void) { #ifdef INTERPRET_SRAV gencallinterp((unsigned int)cached_interpreter_table.SRAV, 0); #else int rt, rd; allocate_register_manually(ECX, (unsigned int *)dst->f.r.rs); rt = allocate_register((unsigned int *)dst->f.r.rt); rd = allocate_register_w((unsigned int *)dst->f.r.rd); if (rd != ECX) { mov_reg32_reg32(rd, rt); sar_reg32_cl(rd); } else { int temp = lru_register(); free_register(temp); mov_reg32_reg32(temp, rt); sar_reg32_cl(temp); mov_reg32_reg32(rd, temp); } #endif }
/* * Allocate 2 temporary data registers and return * it's addressing mode. */ ADDRESS *mdata_register P0 (void) { REG reg1; REG reg2; ADDRESS *ap; reg1 = next_data; next_data = allocate_register (next_data, D_REG); reg2 = next_data; next_data = allocate_register (next_data, D_REG); ap = mk_mreg (reg1, reg2); ap->deep = reg_in_use[reg1]; associated_regs[reg1] |= REGBIT (reg2); associated_regs[reg2] |= REGBIT (reg1); return ap; }
void gensllv() { #ifdef INTERPRET_SLLV gencallinterp((unsigned long)SLLV, 0); #else int rt, rd; allocate_register_manually(ECX, (unsigned long *)dst->f.r.rs); rt = allocate_register((unsigned long *)dst->f.r.rt); rd = allocate_register_w((unsigned long *)dst->f.r.rd); if (rd != ECX) { mov_reg32_reg32(rd, rt); shl_reg32_cl(rd); } else { int temp = lru_register(); free_register(temp); mov_reg32_reg32(temp, rt); shl_reg32_cl(temp); mov_reg32_reg32(rd, temp); } #endif }
static void genbltz_test(void) { int rs_64bit = is64((unsigned int *)dst->f.i.rs); if (!rs_64bit) { int rs = allocate_register((unsigned int *)dst->f.i.rs); cmp_reg32_imm32(rs, 0); jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 } else if (rs_64bit == -1) { cmp_m32_imm32(((unsigned int *)dst->f.i.rs)+1, 0); jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 } else { int rs2 = allocate_64_register2((unsigned int *)dst->f.i.rs); cmp_reg32_imm32(rs2, 0); jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 } }
void gendivu(usf_state_t * state) { #ifdef INTERPRET_DIVU gencallinterp(state, (unsigned int)state->current_instruction_table.DIVU, 0); #else int rs, rt; allocate_register_manually_w(state, EAX, (unsigned int *)&state->lo, 0); allocate_register_manually_w(state, EDX, (unsigned int *)&state->hi, 0); rs = allocate_register(state, (unsigned int*)state->dst->f.r.rs); rt = allocate_register(state, (unsigned int*)state->dst->f.r.rt); cmp_reg32_imm32(state, rt, 0); je_rj(state, (rs == EAX ? 0 : 2) + 2 + 2); mov_reg32_reg32(state, EAX, rs); // 0 or 2 xor_reg32_reg32(state, EDX, EDX); // 2 div_reg32(state, rt); // 2 #endif }
/* * Allocate a temporary floating point register and return it's * addressing mode. */ ADDRESS *float_register P0 (void) { ADDRESS *ap = mk_reg (next_float); next_float = allocate_register (next_float, F_REG); ap->deep = reg_in_use[ap->preg]; return ap; }
/* * Allocate a temporary data register and return * it's addressing mode. */ ADDRESS *data_register P0 (void) { ADDRESS *ap = mk_reg (next_data); next_data = allocate_register (next_data, D_REG); ap->deep = reg_in_use[ap->preg]; return ap; }
void gendivu() { #ifdef INTERPRET_DIVU gencallinterp((unsigned long)DIVU, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned long *)&lo, 0); allocate_register_manually_w(EDX, (unsigned long *)&hi, 0); rs = allocate_register((unsigned long*)dst->f.r.rs); rt = allocate_register((unsigned long*)dst->f.r.rt); cmp_reg32_imm32(rt, 0); je_rj((rs == EAX ? 0 : 2) + 2 + 2); mov_reg32_reg32(EAX, rs); // 0 or 2 xor_reg32_reg32(EDX, EDX); // 2 div_reg32(rt); // 2 #endif }
void gendivu(void) { #ifdef INTERPRET_DIVU gencallinterp((unsigned int)cached_interpreter_table.DIVU, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned int *)r4300_mult_lo(), 0); allocate_register_manually_w(EDX, (unsigned int *)r4300_mult_hi(), 0); rs = allocate_register((unsigned int*)g_dev.r4300.recomp.dst->f.r.rs); rt = allocate_register((unsigned int*)g_dev.r4300.recomp.dst->f.r.rt); cmp_reg32_imm32(rt, 0); je_rj((rs == EAX ? 0 : 2) + 2 + 2); mov_reg32_reg32(EAX, rs); // 0 or 2 xor_reg32_reg32(EDX, EDX); // 2 div_reg32(rt); // 2 #endif }
void gendiv(void) { #ifdef INTERPRET_DIV gencallinterp((unsigned int)cached_interpreter_table.DIV, 0); #else int rs, rt; allocate_register_manually_w(EAX, (unsigned int *)&lo, 0); allocate_register_manually_w(EDX, (unsigned int *)&hi, 0); rs = allocate_register((unsigned int*)dst->f.r.rs); rt = allocate_register((unsigned int*)dst->f.r.rt); cmp_reg32_imm32(rt, 0); je_rj((rs == EAX ? 0 : 2) + 1 + 2); mov_reg32_reg32(EAX, rs); // 0 or 2 cdq(); // 1 idiv_reg32(rt); // 2 #endif }
/* * Allocate a temporary addr register and return it's addressing mode. */ ADDRESS *address_register P0 (void) { ADDRESS *ap = mk_reg (next_addr); next_addr = allocate_register (next_addr, A_REG); ap->deep = reg_in_use[ap->preg]; return ap; }
/* * Allocate 3 temporary data registers and return * it's addressing mode. */ ADDRESS *xdata_register P0 (void) { REG reg1; REG reg2; REG reg3; ADDRESS *ap; reg1 = next_data; next_data = allocate_register (next_data, D_REG); reg2 = next_data; next_data = allocate_register (next_data, D_REG); reg3 = next_data; next_data = allocate_register (next_data, D_REG); ap = mk_xreg (reg1, reg2, reg3); ap->deep = reg_in_use[reg1]; associated_regs[reg1] |= (REGBIT (reg2) | REGBIT (reg3)); associated_regs[reg2] |= (REGBIT (reg1) | REGBIT (reg3)); associated_regs[reg3] |= (REGBIT (reg1) | REGBIT (reg2)); return ap; }
static void genbltz_test(void) { int rs_64bit = is64((unsigned int *)dst->f.i.rs); if (rs_64bit == 0) { #ifdef __x86_64__ int rs = allocate_register_32((unsigned int *)dst->f.i.rs); #else int rs = allocate_register((unsigned int *)dst->f.i.rs); #endif cmp_reg32_imm32(rs, 0); #ifdef __x86_64__ setl_m8rel((unsigned char *) &branch_taken); #else jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 #endif } else if (rs_64bit == -1) { #ifdef __x86_64__ cmp_m32rel_imm32(((unsigned int *)dst->f.i.rs)+1, 0); setl_m8rel((unsigned char *) &branch_taken); #else cmp_m32_imm32(((unsigned int *)dst->f.i.rs)+1, 0); jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 #endif } else { #ifdef __x86_64__ int rs = allocate_register_64((uint64_t*)dst->f.i.rs); cmp_reg64_imm8(rs, 0); setl_m8rel((unsigned char *) &branch_taken); #else int rs2 = allocate_64_register2((unsigned int *)dst->f.i.rs); cmp_reg32_imm32(rs2, 0); jge_rj(12); mov_m32_imm32((unsigned int *)(&branch_taken), 1); // 10 jmp_imm_short(10); // 2 mov_m32_imm32((unsigned int *)(&branch_taken), 0); // 10 #endif } }
void gensra(void) { #ifdef INTERPRET_SRA gencallinterp((unsigned int)cached_interpreter_table.SRA, 0); #else int rt = allocate_register((unsigned int *)dst->f.r.rt); int rd = allocate_register_w((unsigned int *)dst->f.r.rd); mov_reg32_reg32(rd, rt); sar_reg32_imm8(rd, dst->f.r.sa); #endif }
void gensra(usf_state_t * state) { #ifdef INTERPRET_SRA gencallinterp(state, (unsigned int)state->current_instruction_table.SRA, 0); #else int rt = allocate_register(state, (unsigned int *)state->dst->f.r.rt); int rd = allocate_register_w(state, (unsigned int *)state->dst->f.r.rd); mov_reg32_reg32(state, rd, rt); sar_reg32_imm8(state, rd, state->dst->f.r.sa); #endif }
void gensrl(void) { #ifdef INTERPRET_SRL gencallinterp((unsigned int)cached_interpreter_table.SRL, 0); #else int rt = allocate_register((unsigned int *)g_dev.r4300.recomp.dst->f.r.rt); int rd = allocate_register_w((unsigned int *)g_dev.r4300.recomp.dst->f.r.rd); mov_reg32_reg32(rd, rt); shr_reg32_imm8(rd, g_dev.r4300.recomp.dst->f.r.sa); #endif }
void gensra() { #ifdef INTERPRET_SRA gencallinterp((unsigned long)SRA, 0); #else int rt = allocate_register((unsigned long *)dst->f.r.rt); int rd = allocate_register_w((unsigned long *)dst->f.r.rd); mov_reg32_reg32(rd, rt); sar_reg32_imm8(rd, dst->f.r.sa); #endif }
void gensubu(usf_state_t * state) { #ifdef INTERPRET_SUBU gencallinterp(state, (unsigned int)state->current_instruction_table.SUBU, 0); #else int rs = allocate_register(state, (unsigned int *)state->dst->f.r.rs); int rt = allocate_register(state, (unsigned int *)state->dst->f.r.rt); int rd = allocate_register_w(state, (unsigned int *)state->dst->f.r.rd); if (rt != rd && rs != rd) { mov_reg32_reg32(state, rd, rs); sub_reg32_reg32(state, rd, rt); } else { int temp = lru_register(state); free_register(state, temp); mov_reg32_reg32(state, temp, rs); sub_reg32_reg32(state, temp, rt); mov_reg32_reg32(state, rd, temp); } #endif }
// this function is similar to allocate_register except it loads // a 64 bits value, and return the register number of the MSB part int allocate_64_register2(unsigned int *addr) { int reg1, reg2, i; // is it already cached as a 32 bits value ? for (i=0; i<8; i++) { if (last_access[i] != NULL && reg_content[i] == addr) { if (r64[i] == -1) { allocate_register(addr); reg2 = allocate_register(dirty[i] ? NULL : addr+1); r64[i] = reg2; r64[reg2] = i; if (dirty[i]) { reg_content[reg2] = addr+1; dirty[reg2] = 1; mov_reg32_reg32(reg2, i); sar_reg32_imm8(reg2, 31); } return reg2; } } } reg1 = allocate_register(addr); reg2 = allocate_register(addr+1); r64[reg1] = reg2; r64[reg2] = reg1; return reg2; }
void genaddu(void) { #ifdef INTERPRET_ADDU gencallinterp((unsigned int)cached_interpreter_table.ADDU, 0); #else int rs = allocate_register((unsigned int *)g_dev.r4300.recomp.dst->f.r.rs); int rt = allocate_register((unsigned int *)g_dev.r4300.recomp.dst->f.r.rt); int rd = allocate_register_w((unsigned int *)g_dev.r4300.recomp.dst->f.r.rd); if (rt != rd && rs != rd) { mov_reg32_reg32(rd, rs); add_reg32_reg32(rd, rt); } else { int temp = lru_register(); free_register(temp); mov_reg32_reg32(temp, rs); add_reg32_reg32(temp, rt); mov_reg32_reg32(rd, temp); } #endif }
void gensubu() { #ifdef INTERPRET_SUBU gencallinterp((unsigned long)SUBU, 0); #else int rs = allocate_register((unsigned long *)dst->f.r.rs); int rt = allocate_register((unsigned long *)dst->f.r.rt); int rd = allocate_register_w((unsigned long *)dst->f.r.rd); if (rt != rd && rs != rd) { mov_reg32_reg32(rd, rs); sub_reg32_reg32(rd, rt); } else { int temp = lru_register(); free_register(temp); mov_reg32_reg32(temp, rs); sub_reg32_reg32(temp, rt); mov_reg32_reg32(rd, temp); } #endif }
// this function is similar to allocate_register except it loads // a 64 bits value, and return the register number of the MSB part int allocate_64_register2(usf_state_t * state, unsigned int *addr) { int reg1, reg2, i; // is it already cached as a 32 bits value ? for (i=0; i<8; i++) { if (state->last_access[i] != NULL && state->reg_content[i] == addr) { if (state->r64[i] == -1) { allocate_register(state, addr); reg2 = allocate_register(state, state->dirty[i] ? NULL : addr+1); state->r64[i] = reg2; state->r64[reg2] = i; if (state->dirty[i]) { state->reg_content[reg2] = addr+1; state->dirty[reg2] = 1; mov_reg32_reg32(state, reg2, i); sar_reg32_imm8(state, reg2, 31); } return reg2; } } } reg1 = allocate_register(state, addr); reg2 = allocate_register(state, addr+1); state->r64[reg1] = reg2; state->r64[reg2] = reg1; return reg2; }
// this function is similar to allocate_register except it loads // a 64 bits value, and return the register number of the LSB part int allocate_64_register1(unsigned int *addr) { int reg1, reg2, i; // is it already cached as a 32 bits value ? for (i=0; i<8; i++) { if (g_dev.r4300.regcache_state.last_access[i] != NULL && g_dev.r4300.regcache_state.reg_content[i] == addr) { if (g_dev.r4300.regcache_state.r64[i] == -1) { allocate_register(addr); reg2 = allocate_register(g_dev.r4300.regcache_state.dirty[i] ? NULL : addr+1); g_dev.r4300.regcache_state.r64[i] = reg2; g_dev.r4300.regcache_state.r64[reg2] = i; if (g_dev.r4300.regcache_state.dirty[i]) { g_dev.r4300.regcache_state.reg_content[reg2] = addr+1; g_dev.r4300.regcache_state.dirty[reg2] = 1; mov_reg32_reg32(reg2, i); sar_reg32_imm8(reg2, 31); } return i; } } } reg1 = allocate_register(addr); reg2 = allocate_register(addr+1); g_dev.r4300.regcache_state.r64[reg1] = reg2; g_dev.r4300.regcache_state.r64[reg2] = reg1; return reg1; }
void gensub(void) { #ifdef INTERPRET_SUB gencallinterp((unsigned int)cached_interpreter_table.SUB, 0); #else int rs = allocate_register((unsigned int *)dst->f.r.rs); int rt = allocate_register((unsigned int *)dst->f.r.rt); int rd = allocate_register_w((unsigned int *)dst->f.r.rd); if (rt != rd && rs != rd) { mov_reg32_reg32(rd, rs); sub_reg32_reg32(rd, rt); } else { int temp = lru_register(); free_register(temp); mov_reg32_reg32(temp, rs); sub_reg32_reg32(temp, rt); mov_reg32_reg32(rd, temp); } #endif }
filter_fct_t net_filter_alloc(filter_t *filter, unsigned int size, unsigned int *lenp) { struct local *s; int len, oldi, i, j, ncommon, sp; int type, value, arg, op, reg, reg1, dst, commoni; int *instructions, *instp; #if USE_EXTRA_REGS int oldmaxreg; #endif boolean_t compiling; #define SCHAR_MAX 127 /* machine/machlimits->h, anyone? */ assert(NET_MAX_FILTER <= SCHAR_MAX); assert(NET_FILTER_STACK_DEPTH <= SCHAR_MAX); assert(NREGS <= SCHAR_MAX); assert(size < NET_MAX_FILTER); s = (struct local *) kalloc(sizeof *s); #if USE_EXTRA_REGS s->maxreg = INITIAL_NSCRATCHREGS; #endif len = 0; compiling = FALSE; /* This loop runs at least twice, once with compiling==FALSE to determine the length of the instructions we will compile, and once with compiling==TRUE to compile them. The code generated on the two passes must be the same. In the USE_EXTRA_REGS case, the loop can be re-run an extra time while !compiling, if we decide to use the callee-saves registers. This is because we may be able to generate better code with the help of these registers than before. */ while (1) { /* Identify values that we can potentially preserve in a register to avoid having to reload them. All immediate values and references to known offsets in the header or data are candidates. The results of this loop are the same on every run, so with a bit of work we could run it just once; but this is not a time-critical application. */ ncommon = 0; for (i = 0; i < size; i++) { oldi = i; arg = NETF_ARG(filter[i]); if (arg == NETF_PUSHLIT) { type = NF_LITERAL; value = filter[++i]; if (value == 0) continue; } else if (arg >= NETF_PUSHSTK) { continue; } else if (arg >= NETF_PUSHHDR) { type = NF_HEADER; value = arg - NETF_PUSHHDR; } else if (arg >= NETF_PUSHWORD) { type = NF_DATA; value = arg - NETF_PUSHWORD; } else { continue; } for (j = 0; j < ncommon; j++) { if (s->common[j].type == type && s->common[j].value == value) { s->common[j].nuses++; break; } } if (j == ncommon) { s->common[j].type = type; s->common[j].value = value; s->common[j].nuses = 1; ncommon++; } s->commonpos[oldi] = j; } #if USE_EXTRA_REGS oldmaxreg = s->maxreg; #endif /* Initially, no registers hold common values or are on the stack. */ for (i = 0; i < ncommon; i++) s->common[i].reg = NO_REG; for (i = 0; i < NSCRATCHREGS; i++) { s->regs[scratchregs[i]].commoni = NOT_COMMON_VALUE; s->regs[scratchregs[i]].stacktimes = 0; } /* Now read through the filter and generate code. */ sp = -1; /* sp points to top element */ for (i = 0; i < size; i++) { if (!compiling) instp = junk_filter; assert(sp >= -1); assert(sp < NET_FILTER_STACK_DEPTH - 1); commoni = s->commonpos[i]; arg = NETF_ARG(filter[i]); op = NETF_OP(filter[i]); /* Generate code to get the required value into a register and set `reg' to the number of this register. */ switch (arg) { case NETF_PUSHLIT: value = filter[++i]; reg = s->common[commoni].reg; if (reg == 0) { if ((reg = allocate_register(s, commoni)) == 0) goto fail; assert(value >= 0); /* Comes from unsigned short. */ if (value > MAX_LDO) { *instp++ = LDIL(value & ~MAX_LDO, reg); value &= MAX_LDO; if (value != 0) *instp++ = LDO(value, reg, reg); } else *instp++ = LDO(value, 0, reg); } s->common[commoni].nuses--; break; case NETF_NOPUSH: reg = s->stackregs[sp--]; s->regs[reg].stacktimes--; break; case NETF_PUSHZERO: reg = 0; break; case NETF_PUSHIND: case NETF_PUSHHDRIND: reg1 = s->stackregs[sp--]; s->regs[reg1].stacktimes--; if (arg == NETF_PUSHIND) *instp++ = ARITH_OP(OP_COMCLR, ARITH_ULT, reg1, REG_ARG1, REG_RET0); /* comclr,< <reg1>,arg1,ret0 */ else *instp++ = COMICLR(ARITH_UGT, NET_HDW_HDR_MAX/sizeof (unsigned short), reg1, REG_RET0); /* comiclr,> N,<reg1>,ret0 */ assert((NET_HDW_HDR_MAX / sizeof(unsigned short)) <= MAX_COMICLR); *instp++ = BV_N(0, REG_RTN); /* bv,n (rp) */ if ((reg = allocate_register(s, -1)) == 0) goto fail; *instp++ = LDHX_S(reg1, (arg == NETF_PUSHIND) ? REG_ARG0 : REG_ARG2, reg); /* ldhx,s reg1(arg0/2),reg */ break; default: if (arg >= NETF_PUSHSTK) reg = s->stackregs[sp - (arg - NETF_PUSHSTK)]; else if (arg >= NETF_PUSHWORD) { assert(2 * (NETF_PUSHHDR - NETF_PUSHWORD) <= MAX_LDO); assert(NETF_PUSHHDR - NETF_PUSHWORD <= MAX_COMICLR); assert(NETF_PUSHSTK - NETF_PUSHHDR <= MAX_LDO); reg = s->common[commoni].reg; if (reg == 0) { if ((reg = allocate_register(s, commoni)) == 0) goto fail; if (arg < NETF_PUSHHDR) { value = arg - NETF_PUSHWORD; *instp++ = COMICLR(ARITH_ULT, value, REG_ARG1, REG_RET0); /* comiclr,< value,arg1,ret0 */ *instp++ = BV_N(0, REG_RTN); /* bv,n (rp) */ reg1 = REG_ARG0; } else { value = arg - NETF_PUSHHDR; reg1 = REG_ARG2; } *instp++ = LDH(2 * value, reg1, reg); } s->common[commoni].nuses--; } } /* Now generate code to do `op' on `reg1' (lhs) and `reg' (rhs). */ if (op != NETF_NOP) { reg1 = s->stackregs[sp--]; s->regs[reg1].stacktimes--; } switch (op) { case NETF_OP(NETF_CAND): case NETF_OP(NETF_COR): case NETF_OP(NETF_CNAND): case NETF_OP(NETF_CNOR): dst = -1; case NETF_OP(NETF_NOP): break; default: /* Allocate a register to put the result in. */ if ((dst = allocate_register(s, -1)) == 0) goto fail; } switch (op) { case NETF_OP(NETF_NOP): dst = reg; break; case NETF_OP(NETF_EQ): case NETF_OP(NETF_LT): case NETF_OP(NETF_LE): case NETF_OP(NETF_GT): case NETF_OP(NETF_GE): case NETF_OP(NETF_NEQ): switch (op) { case NETF_OP(NETF_EQ): j = ARITH_NE; break; case NETF_OP(NETF_LT): j = ARITH_UGE; break; case NETF_OP(NETF_LE): j = ARITH_UGT; break; case NETF_OP(NETF_GT): j = ARITH_ULE; break; case NETF_OP(NETF_GE): j = ARITH_ULT; break; case NETF_OP(NETF_NEQ): j = ARITH_EQ; break; } *instp++ = ARITH_OP(OP_COMCLR, j, reg1, reg, dst); *instp++ = LDI(1, dst); break; case NETF_OP(NETF_AND): case NETF_OP(NETF_OR): case NETF_OP(NETF_XOR): case NETF_OP(NETF_ADD): case NETF_OP(NETF_SUB): switch (op) { case NETF_OP(NETF_AND): j = OP_AND; break; case NETF_OP(NETF_OR): j = OP_OR; break; case NETF_OP(NETF_XOR): j = OP_XOR; break; case NETF_OP(NETF_ADD): j = OP_ADD; break; case NETF_OP(NETF_SUB): j = OP_SUB; break; } *instp++ = ARITH_OP(j, ARITH_NEVER, reg1, reg, dst); if (op == NETF_OP(NETF_ADD) || op == NETF_OP(NETF_SUB)) *instp++ = EXTRU(dst, 31, 16, dst); /* Adds and subtracts can produce results that don't fit in 16 bits so they have to be masked. The logical operations can't so they don't. */ break; case NETF_OP(NETF_LSH): case NETF_OP(NETF_RSH): *instp++ = SUBI(31, reg, REG_RET0); *instp++ = MTSAR(REG_RET0); if (op == NETF_OP(NETF_LSH)) { *instp++ = ZVDEP(reg1, 32, dst); *instp++ = EXTRU(dst, 31, 16, dst); } else *instp++ = VEXTRU(reg, 32, dst); /* For some reason, all arithmetic is done in 16 bits, so the result of LSH has to be masked with 0xFFFF. The result of RSH doesn't since it can't be any bigger than the 16-bit value that was shifted. We use ret0 to compute the shift amount because we can't use reg or reg1 (which might have values we subsequently use), nor dst (which might be the same as reg1). Alternatively, we could allocate another register, but we would need to temporarily do s->regs[dst].stacktimes++ to avoid just getting dst again. */ break; case NETF_OP(NETF_COR): /* comb,<>,n reg1,reg,$x | bv (rp) | ldi 1,ret0 | $x: I have found no way to do this in less than three instructions (as for the other NETF_C* operations), unless it be to branch to a "bv (rp) | ldi 1,ret0" postamble, and what would be the point in that? */ *instp++ = COMB_SKIP_1(COND_EQ, 1, 1, reg1, reg); *instp++ = BV(0, REG_RTN); *instp++ = LDI(1, REG_RET0); break; case NETF_OP(NETF_CNAND): /* xor,= reg1,reg,ret0 | bv,n (rp) This leaves a non-zero (true) value in ret0 if the values are different. */ *instp++ = ARITH_OP(OP_XOR, ARITH_EQ, reg1, reg, REG_RET0); *instp++ = BV_N(0, REG_RTN); break; case NETF_OP(NETF_CAND): case NETF_OP(NETF_CNOR): /* comclr,{=|<>} reg1,reg,ret0 | bv,n (rp) */ j = (op == NETF_OP(NETF_CAND)) ? ARITH_EQ : ARITH_NE; *instp++ = ARITH_OP(OP_COMCLR, j, reg1, reg, REG_RET0); *instp++ = BV_N(0, REG_RTN); break; default: printf("op == 0x%x\n", op); panic("net_filter_alloc: bad op"); /* Should have been caught by parse_net_filter(). */ } /* If the op generated a result, push it on the stack. */ if (dst >= 0) { s->stackregs[++sp] = dst; s->regs[dst].stacktimes++; } if (!compiling) { assert(instp - junk_filter <= MAX_INSTR_PER_ITEM); len += instp - junk_filter; } } if (compiling) { /* If the stack contains any values, we are supposed to return 0 or 1 according as the top-of-stack is zero or not. Since the only place we are called requires just zero-false/nonzero-true, we simply copy the value into ret0. If the stack is empty, we return TRUE. */ *instp++ = BV(0, REG_RTN); /* bv (rp) */ if (sp >= 0) *instp++ = COPY(s->stackregs[sp], REG_RET0); else *instp++ = LDI(1, REG_RET0); break; } else { len += 2; #if USE_EXTRA_REGS if (s->maxreg > oldmaxreg) { len = 0; continue; } len += compile_preamble(NULL, s); #endif } if ((instructions = kmem_alloc_exec(len * sizeof (int))) == NULL) return NULL; instp = instructions; #if USE_EXTRA_REGS instp += compile_preamble(instp, s); #endif compiling = TRUE; } assert(instp - instructions == len); *lenp = len * sizeof (int); fdcache(HP700_SID_KERNEL, (vm_offset_t)instructions, len * sizeof (int)); kfree((vm_offset_t) s, sizeof *s); return (filter_fct_t) instructions; fail: assert(!compiling); kfree((vm_offset_t) s, sizeof *s); printf("net_filter_alloc: failed to compile (filter too complex)\n"); printf("-- will work, but more slowly; consider enabling USE_EXTRA_REGS\n"); return NULL; }