/* * emit REX byte */ static void emit_rex(struct bpf_jit_state *st, uint32_t op, uint32_t reg, uint32_t rm) { uint8_t rex; /* mark operand registers as used*/ USED(st->reguse, reg); USED(st->reguse, rm); rex = 0; if (BPF_CLASS(op) == EBPF_ALU64 || op == (BPF_ST | BPF_MEM | EBPF_DW) || op == (BPF_STX | BPF_MEM | EBPF_DW) || op == (BPF_STX | EBPF_XADD | EBPF_DW) || op == (BPF_LD | BPF_IMM | EBPF_DW) || (BPF_CLASS(op) == BPF_LDX && BPF_MODE(op) == BPF_MEM && BPF_SIZE(op) != BPF_W)) rex |= REX_W; if (IS_EXT_REG(reg)) rex |= REX_R; if (IS_EXT_REG(rm)) rex |= REX_B; /* store using SIL, DIL */ if (op == (BPF_STX | BPF_MEM | BPF_B) && (reg == RDI || reg == RSI)) rex |= REX_PREFIX; if (rex != 0) { rex |= REX_PREFIX; emit_bytes(st, &rex, sizeof(rex)); } }
/* Because we really don't have an IR, this stuff is a little messy. */ static int F(int code, int v0, int v1) { u_int hash; int val; struct valnode *p; hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); hash %= MODULUS; for (p = hashtbl[hash]; p; p = p->next) if (p->code == code && p->v0 == v0 && p->v1 == v1) return p->val; val = ++curval; if (BPF_MODE(code) == BPF_IMM && (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { vmap[val].const_val = v0; vmap[val].is_const = 1; } p = next_vnode++; p->val = val; p->code = code; p->v0 = v0; p->v1 = v1; p->next = hashtbl[hash]; hashtbl[hash] = p; return val; }
/* * Return the register number that is used by s. If A and X are both * used, return AX_ATOM. If no register is used, return -1. * * The implementation should probably change to an array access. */ static int atomuse(struct stmt *s) { register int c = s->code; if (c == NOP) return -1; switch (BPF_CLASS(c)) { case BPF_RET: return (BPF_RVAL(c) == BPF_A) ? A_ATOM : (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; case BPF_LD: case BPF_LDX: return (BPF_MODE(c) == BPF_IND) ? X_ATOM : (BPF_MODE(c) == BPF_MEM) ? s->k : -1; case BPF_ST: return A_ATOM; case BPF_STX: return X_ATOM; case BPF_JMP: case BPF_ALU: if (BPF_SRC(c) == BPF_X) return AX_ATOM; return A_ATOM; case BPF_MISC: return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; } abort(); /* NOTREACHED */ }
static void decode_bpf_code(uint16_t code) { uint16_t i = code & ~BPF_CLASS(code); printxval(bpf_class, BPF_CLASS(code), "BPF_???"); switch (BPF_CLASS(code)) { case BPF_LD: case BPF_LDX: tprints(" | "); printxval(bpf_size, BPF_SIZE(code), "BPF_???"); tprints(" | "); printxval(bpf_mode, BPF_MODE(code), "BPF_???"); break; case BPF_ST: case BPF_STX: if (i) tprintf(" | %#x /* %s */", i, "BPF_???"); break; case BPF_ALU: tprints(" | "); printxval(bpf_src, BPF_SRC(code), "BPF_???"); tprints(" | "); printxval(bpf_op_alu, BPF_OP(code), "BPF_???"); break; case BPF_JMP: tprints(" | "); printxval(bpf_src, BPF_SRC(code), "BPF_???"); tprints(" | "); printxval(bpf_op_jmp, BPF_OP(code), "BPF_???"); break; case BPF_RET: tprints(" | "); printxval(bpf_rval, BPF_RVAL(code), "BPF_???"); i &= ~BPF_RVAL(code); if (i) tprintf(" | %#x /* %s */", i, "BPF_???"); break; case BPF_MISC: tprints(" | "); printxval(bpf_miscop, BPF_MISCOP(code), "BPF_???"); i &= ~BPF_MISCOP(code); if (i) tprintf(" | %#x /* %s */", i, "BPF_???"); break; } }
int bpf_validate(const struct sock_fprog *bpf) { uint32_t i, from; const struct sock_filter *p; if (!bpf) return 0; if (bpf->len < 1) return 0; for (i = 0; i < bpf->len; ++i) { p = &bpf->filter[i]; switch (BPF_CLASS(p->code)) { /* * Check that memory operations use valid addresses. */ case BPF_LD: case BPF_LDX: switch (BPF_MODE(p->code)) { case BPF_IMM: break; case BPF_ABS: case BPF_IND: case BPF_MSH: /* * There's no maximum packet data size * in userland. The runtime packet length * check suffices. */ break; case BPF_MEM: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_LEN: break; default: return 0; } break; case BPF_ST: case BPF_STX: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_ALU: switch (BPF_OP(p->code)) { case BPF_ADD: case BPF_SUB: case BPF_MUL: case BPF_OR: case BPF_AND: case BPF_LSH: case BPF_RSH: case BPF_NEG: break; case BPF_DIV: /* * Check for constant division by 0. */ if (BPF_RVAL(p->code) == BPF_K && p->k == 0) return 0; break; default: return 0; } break; case BPF_JMP: /* * Check that jumps are within the code block, * and that unconditional branches don't go * backwards as a result of an overflow. * Unconditional branches have a 32-bit offset, * so they could overflow; we check to make * sure they don't. Conditional branches have * an 8-bit offset, and the from address is <= * BPF_MAXINSNS, and we assume that BPF_MAXINSNS * is sufficiently small that adding 255 to it * won't overflow. * * We know that len is <= BPF_MAXINSNS, and we * assume that BPF_MAXINSNS is < the maximum size * of a u_int, so that i + 1 doesn't overflow. * * For userland, we don't know that the from * or len are <= BPF_MAXINSNS, but we know that * from <= len, and, except on a 64-bit system, * it's unlikely that len, if it truly reflects * the size of the program we've been handed, * will be anywhere near the maximum size of * a u_int. We also don't check for backward * branches, as we currently support them in * userland for the protochain operation. */ from = i + 1; switch (BPF_OP(p->code)) { case BPF_JA: if (from + p->k >= bpf->len) return 0; break; case BPF_JEQ: case BPF_JGT: case BPF_JGE: case BPF_JSET: if (from + p->jt >= bpf->len || from + p->jf >= bpf->len) return 0; break; default: return 0; } break; case BPF_RET: break; case BPF_MISC: break; default: return 0; } } return BPF_CLASS(bpf->filter[bpf->len - 1].code) == BPF_RET; }
static void opt_peep(struct block *b) { struct slist *s; struct slist *next, *last; int val; s = b->stmts; if (s == 0) return; last = s; for (/*empty*/; /*empty*/; s = next) { /* * Skip over nops. */ s = this_op(s); if (s == 0) break; /* nothing left in the block */ /* * Find the next real instruction after that one * (skipping nops). */ next = this_op(s->next); if (next == 0) break; /* no next instruction */ last = next; /* * st M[k] --> st M[k] * ldx M[k] tax */ if (s->s.code == BPF_ST && next->s.code == (BPF_LDX|BPF_MEM) && s->s.k == next->s.k) { done = 0; next->s.code = BPF_MISC|BPF_TAX; } /* * ld #k --> ldx #k * tax txa */ if (s->s.code == (BPF_LD|BPF_IMM) && next->s.code == (BPF_MISC|BPF_TAX)) { s->s.code = BPF_LDX|BPF_IMM; next->s.code = BPF_MISC|BPF_TXA; done = 0; } /* * This is an ugly special case, but it happens * when you say tcp[k] or udp[k] where k is a constant. */ if (s->s.code == (BPF_LD|BPF_IMM)) { struct slist *add, *tax, *ild; /* * Check that X isn't used on exit from this * block (which the optimizer might cause). * We know the code generator won't generate * any local dependencies. */ if (ATOMELEM(b->out_use, X_ATOM)) continue; /* * Check that the instruction following the ldi * is an addx, or it's an ldxms with an addx * following it (with 0 or more nops between the * ldxms and addx). */ if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) add = next; else add = this_op(next->next); if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) continue; /* * Check that a tax follows that (with 0 or more * nops between them). */ tax = this_op(add->next); if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) continue; /* * Check that an ild follows that (with 0 or more * nops between them). */ ild = this_op(tax->next); if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || BPF_MODE(ild->s.code) != BPF_IND) continue; /* * We want to turn this sequence: * * (004) ldi #0x2 {s} * (005) ldxms [14] {next} -- optional * (006) addx {add} * (007) tax {tax} * (008) ild [x+0] {ild} * * into this sequence: * * (004) nop * (005) ldxms [14] * (006) nop * (007) nop * (008) ild [x+2] * * XXX We need to check that X is not * subsequently used, because we want to change * what'll be in it after this sequence. * * We know we can eliminate the accumulator * modifications earlier in the sequence since * it is defined by the last stmt of this sequence * (i.e., the last statement of the sequence loads * a value into the accumulator, so we can eliminate * earlier operations on the accumulator). */ ild->s.k += s->s.k; s->s.code = NOP; add->s.code = NOP; tax->s.code = NOP; done = 0; } } /* * If the comparison at the end of a block is an equality * comparison against a constant, and nobody uses the value * we leave in the A register at the end of a block, and * the operation preceding the comparison is an arithmetic * operation, we can sometime optimize it away. */ if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && !ATOMELEM(b->out_use, A_ATOM)) { /* * We can optimize away certain subtractions of the * X register. */ if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { val = b->val[X_ATOM]; if (vmap[val].is_const) { /* * If we have a subtract to do a comparison, * and the X register is a known constant, * we can merge this value into the * comparison: * * sub x -> nop * jeq #y jeq #(x+y) */ b->s.k += vmap[val].const_val; last->s.code = NOP; done = 0; } else if (b->s.k == 0) { /* * If the X register isn't a constant, * and the comparison in the test is * against 0, we can compare with the * X register, instead: * * sub x -> nop * jeq #0 jeq x */ last->s.code = NOP; b->s.code = BPF_JMP|BPF_JEQ|BPF_X; done = 0; } } /* * Likewise, a constant subtract can be simplified: * * sub #x -> nop * jeq #y -> jeq #(x+y) */ else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { last->s.code = NOP; b->s.k += last->s.k; done = 0; } /* * And, similarly, a constant AND can be simplified * if we're testing against 0, i.e.: * * and #k nop * jeq #0 -> jset #k */ else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && b->s.k == 0) { b->s.k = last->s.k; b->s.code = BPF_JMP|BPF_K|BPF_JSET; last->s.code = NOP; done = 0; opt_not(b); } } /* * jset #0 -> never * jset #ffffffff -> always */ if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { if (b->s.k == 0) JT(b) = JF(b); if (b->s.k == (int)0xffffffff) JF(b) = JT(b); } /* * If we're comparing against the index register, and the index * register is a known constant, we can just compare against that * constant. */ val = b->val[X_ATOM]; if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { bpf_int32 v = vmap[val].const_val; b->s.code &= ~BPF_X; b->s.k = v; } /* * If the accumulator is a known constant, we can compute the * comparison result. */ val = b->val[A_ATOM]; if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { bpf_int32 v = vmap[val].const_val; switch (BPF_OP(b->s.code)) { case BPF_JEQ: v = v == b->s.k; break; case BPF_JGT: v = (unsigned)v > (unsigned)b->s.k; break; case BPF_JGE: v = (unsigned)v >= (unsigned)b->s.k; break; case BPF_JSET: v &= b->s.k; break; default: abort(); } if (JF(b) != JT(b)) done = 0; if (v) JF(b) = JT(b); else JT(b) = JF(b); } }
int bpf_validate(struct bpf_insn *f, int len) { int i; int from; struct bpf_insn *p; if (len < 1 || len > BPF_MAXINSNS) return 0; i = 0; p = &f[i]; switch (BPF_CLASS(p->code)) { /* * Check that memory operations use valid addresses. */ case BPF_LD: case BPF_LDX: switch (BPF_MODE(p->code)) { case BPF_IMM: break; case BPF_ABS: case BPF_IND: case BPF_MSH: /* * More strict check with actual packet length * is done runtime. */ if (p->k >= bpf_maxbufsize) return 0; break; case BPF_MEM: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_LEN: break; default: return 0; } break; case BPF_ST: case BPF_STX: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_ALU: switch (BPF_OP(p->code)) { case BPF_ADD: case BPF_SUB: case BPF_OR: case BPF_AND: case BPF_LSH: case BPF_RSH: case BPF_NEG: break; case BPF_DIV: /* * Check for constant division by 0. */ if (BPF_RVAL(p->code) == BPF_K && p->k == 0) return 0; break; default: return 0; } break; case BPF_JMP: /* * Check that jumps are forward, and within * the code block. */ from = i + 1; switch (BPF_OP(p->code)) { case BPF_JA: if (from + p->k < from || from + p->k >= len) return 0; break; case BPF_JEQ: case BPF_JGT: case BPF_JGE: case BPF_JSET: if (from + p->jt >= len || from + p->jf >= len) return 0; break; default: return 0; } break; case BPF_RET: break; case BPF_MISC: break; default: return 0; } klee_merge(); i = 1; p = &f[i]; switch (BPF_CLASS(p->code)) { /* * Check that memory operations use valid addresses. */ case BPF_LD: case BPF_LDX: switch (BPF_MODE(p->code)) { case BPF_IMM: break; case BPF_ABS: case BPF_IND: case BPF_MSH: /* * More strict check with actual packet length * is done runtime. */ if (p->k >= bpf_maxbufsize) return 0; break; case BPF_MEM: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_LEN: break; default: return 0; } break; case BPF_ST: case BPF_STX: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_ALU: switch (BPF_OP(p->code)) { case BPF_ADD: case BPF_SUB: case BPF_OR: case BPF_AND: case BPF_LSH: case BPF_RSH: case BPF_NEG: break; case BPF_DIV: /* * Check for constant division by 0. */ if (BPF_RVAL(p->code) == BPF_K && p->k == 0) return 0; break; default: return 0; } break; case BPF_JMP: /* * Check that jumps are forward, and within * the code block. */ from = i + 1; switch (BPF_OP(p->code)) { case BPF_JA: if (from + p->k < from || from + p->k >= len) return 0; break; case BPF_JEQ: case BPF_JGT: case BPF_JGE: case BPF_JSET: if (from + p->jt >= len || from + p->jf >= len) return 0; break; default: return 0; } break; case BPF_RET: break; case BPF_MISC: break; default: return 0; } klee_merge(); i = 2; p = &f[i]; switch (BPF_CLASS(p->code)) { /* * Check that memory operations use valid addresses. */ case BPF_LD: case BPF_LDX: switch (BPF_MODE(p->code)) { case BPF_IMM: break; case BPF_ABS: case BPF_IND: case BPF_MSH: /* * More strict check with actual packet length * is done runtime. */ if (p->k >= bpf_maxbufsize) return 0; break; case BPF_MEM: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_LEN: break; default: return 0; } break; case BPF_ST: case BPF_STX: if (p->k >= BPF_MEMWORDS) return 0; break; case BPF_ALU: switch (BPF_OP(p->code)) { case BPF_ADD: case BPF_SUB: case BPF_OR: case BPF_AND: case BPF_LSH: case BPF_RSH: case BPF_NEG: break; case BPF_DIV: /* * Check for constant division by 0. */ if (BPF_RVAL(p->code) == BPF_K && p->k == 0) return 0; break; default: return 0; } break; case BPF_JMP: /* * Check that jumps are forward, and within * the code block. */ from = i + 1; switch (BPF_OP(p->code)) { case BPF_JA: if (from + p->k < from || from + p->k >= len) return 0; break; case BPF_JEQ: case BPF_JGT: case BPF_JGE: case BPF_JSET: if (from + p->jt >= len || from + p->jf >= len) return 0; break; default: return 0; } break; case BPF_RET: break; case BPF_MISC: break; default: return 0; } klee_merge(); return BPF_CLASS(f[len - 1].code) == BPF_RET; }
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { struct bpf_insn *insn = bpf_prog->insnsi; int insn_cnt = bpf_prog->len; bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0); bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; int i, cnt = 0; int proglen = 0; u8 *prog = temp; emit_prologue(&prog); if (seen_ld_abs) emit_load_skb_data_hlen(&prog); for (i = 0; i < insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; u8 b1 = 0, b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; bool reload_skb_data; int ilen; u8 *func; switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: switch (BPF_OP(insn->code)) { case BPF_ADD: b2 = 0x01; break; case BPF_SUB: b2 = 0x29; break; case BPF_AND: b2 = 0x21; break; case BPF_OR: b2 = 0x09; break; case BPF_XOR: b2 = 0x31; break; } if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_2mod(0x48, dst_reg, src_reg)); else if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT1(add_2mod(0x40, dst_reg, src_reg)); EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; /* mov dst, src */ case BPF_ALU64 | BPF_MOV | BPF_X: EMIT_mov(dst_reg, src_reg); break; /* mov32 dst, src */ case BPF_ALU | BPF_MOV | BPF_X: if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT1(add_2mod(0x40, dst_reg, src_reg)); EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); break; /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, dst_reg)); else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, dst_reg)); else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_ADD: b3 = 0xC0; break; case BPF_SUB: b3 = 0xE8; break; case BPF_AND: b3 = 0xE0; break; case BPF_OR: b3 = 0xC8; break; case BPF_XOR: b3 = 0xF0; break; } if (is_imm8(imm32)) EMIT3(0x83, add_1reg(b3, dst_reg), imm32); else EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU64 | BPF_MOV | BPF_K: /* optimization: if imm32 is positive, * use 'mov eax, imm32' (which zero-extends imm32) * to save 2 bytes */ if (imm32 < 0) { /* 'mov rax, imm32' sign extends imm32 */ b1 = add_1mod(0x48, dst_reg); b2 = 0xC7; b3 = 0xC0; EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); break; } case BPF_ALU | BPF_MOV | BPF_K: /* optimization: if imm32 is zero, use 'xor <dst>,<dst>' * to save 3 bytes. */ if (imm32 == 0) { if (is_ereg(dst_reg)) EMIT1(add_2mod(0x40, dst_reg, dst_reg)); b2 = 0x31; /* xor */ b3 = 0xC0; EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); break; } /* mov %eax, imm32 */ if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; case BPF_LD | BPF_IMM | BPF_DW: if (insn[1].code != 0 || insn[1].src_reg != 0 || insn[1].dst_reg != 0 || insn[1].off != 0) { /* verifier must catch invalid insns */ pr_err("invalid BPF_LD_IMM64 insn\n"); return -EINVAL; } /* optimization: if imm64 is zero, use 'xor <dst>,<dst>' * to save 7 bytes. */ if (insn[0].imm == 0 && insn[1].imm == 0) { b1 = add_2mod(0x48, dst_reg, dst_reg); b2 = 0x31; /* xor */ b3 = 0xC0; EMIT3(b1, b2, add_2reg(b3, dst_reg, dst_reg)); insn++; i++; break; } /* movabsq %rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(insn[0].imm, 4); EMIT(insn[1].imm, 4); insn++; i++; break; /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: EMIT1(0x50); /* push rax */ EMIT1(0x52); /* push rdx */ if (BPF_SRC(insn->code) == BPF_X) /* mov r11, src_reg */ EMIT_mov(AUX_REG, src_reg); else /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); /* mov rax, dst_reg */ EMIT_mov(BPF_REG_0, dst_reg); /* xor edx, edx * equivalent to 'xor rdx, rdx', but one byte less */ EMIT2(0x31, 0xd2); if (BPF_SRC(insn->code) == BPF_X) { /* if (src_reg == 0) return 0 */ /* cmp r11, 0 */ EMIT4(0x49, 0x83, 0xFB, 0x00); /* jne .+9 (skip over pop, pop, xor and jmp) */ EMIT2(X86_JNE, 1 + 1 + 2 + 5); EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ EMIT2(0x31, 0xc0); /* xor eax, eax */ /* jmp cleanup_addr * addrs[i] - 11, because there are 11 bytes * after this insn: div, mov, pop, pop, mov */ jmp_offset = ctx->cleanup_addr - (addrs[i] - 11); EMIT1_off32(0xE9, jmp_offset); } if (BPF_CLASS(insn->code) == BPF_ALU64) /* div r11 */ EMIT3(0x49, 0xF7, 0xF3); else /* div r11d */ EMIT3(0x41, 0xF7, 0xF3); if (BPF_OP(insn->code) == BPF_MOD) /* mov r11, rdx */ EMIT3(0x49, 0x89, 0xD3); else /* mov r11, rax */ EMIT3(0x49, 0x89, 0xC3); EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ /* mov dst_reg, r11 */ EMIT_mov(dst_reg, AUX_REG); break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_X: EMIT1(0x50); /* push rax */ EMIT1(0x52); /* push rdx */ /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); if (BPF_SRC(insn->code) == BPF_X) /* mov rax, src_reg */ EMIT_mov(BPF_REG_0, src_reg); else /* mov rax, imm32 */ EMIT3_off32(0x48, 0xC7, 0xC0, imm32); if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, AUX_REG)); else if (is_ereg(AUX_REG)) EMIT1(add_1mod(0x40, AUX_REG)); /* mul(q) r11 */ EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); /* mov r11, rax */ EMIT_mov(AUX_REG, BPF_REG_0); EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ /* mov dst_reg, r11 */ EMIT_mov(dst_reg, AUX_REG); break; /* shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, dst_reg)); else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_LSH: b3 = 0xE0; break; case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: /* check for bad case when dst_reg == rcx */ if (dst_reg == BPF_REG_4) { /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); dst_reg = AUX_REG; } if (src_reg != BPF_REG_4) { /* common case */ EMIT1(0x51); /* push rcx */ /* mov rcx, src_reg */ EMIT_mov(BPF_REG_4, src_reg); } /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, dst_reg)); else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_LSH: b3 = 0xE0; break; case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } EMIT2(0xD3, add_1reg(b3, dst_reg)); if (src_reg != BPF_REG_4) EMIT1(0x59); /* pop rcx */ if (insn->dst_reg == BPF_REG_4) /* mov dst_reg, r11 */ EMIT_mov(insn->dst_reg, AUX_REG); break; case BPF_ALU | BPF_END | BPF_FROM_BE: switch (imm32) { case 16: /* emit 'ror %ax, 8' to swap lower 2 bytes */ EMIT1(0x66); if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); /* emit 'movzwl eax, ax' */ if (is_ereg(dst_reg)) EMIT3(0x45, 0x0F, 0xB7); else EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ if (is_ereg(dst_reg)) EMIT2(0x41, 0x0F); else EMIT1(0x0F); EMIT1(add_1reg(0xC8, dst_reg)); break; case 64: /* emit 'bswap rax' to swap 8 bytes */ EMIT3(add_1mod(0x48, dst_reg), 0x0F, add_1reg(0xC8, dst_reg)); break; } break; case BPF_ALU | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: /* emit 'movzwl eax, ax' to zero extend 16-bit * into 64 bit */ if (is_ereg(dst_reg)) EMIT3(0x45, 0x0F, 0xB7); else EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* emit 'mov eax, eax' to clear upper 32-bits */ if (is_ereg(dst_reg)) EMIT1(0x45); EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); break; case 64: /* nop */ break; } break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) EMIT2(0x41, 0xC6); else EMIT1(0xC6); goto st; case BPF_ST | BPF_MEM | BPF_H: if (is_ereg(dst_reg)) EMIT3(0x66, 0x41, 0xC7); else EMIT2(0x66, 0xC7); goto st; case BPF_ST | BPF_MEM | BPF_W: if (is_ereg(dst_reg)) EMIT2(0x41, 0xC7); else EMIT1(0xC7); goto st; case BPF_ST | BPF_MEM | BPF_DW: EMIT2(add_1mod(0x48, dst_reg), 0xC7); st: if (is_imm8(insn->off)) EMIT2(add_1reg(0x40, dst_reg), insn->off); else EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); break; /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ if (is_ereg(dst_reg) || is_ereg(src_reg) || /* have to add extra byte for x86 SIL, DIL regs */ src_reg == BPF_REG_1 || src_reg == BPF_REG_2) EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); goto stx; case BPF_STX | BPF_MEM | BPF_H: if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT2(0x66, 0x89); goto stx; case BPF_STX | BPF_MEM | BPF_W: if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT1(0x89); goto stx; case BPF_STX | BPF_MEM | BPF_DW: EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); stx: if (is_imm8(insn->off)) EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: /* emit 'movzx rax, byte ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); goto ldx; case BPF_LDX | BPF_MEM | BPF_H: /* emit 'movzx rax, word ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); goto ldx; case BPF_LDX | BPF_MEM | BPF_W: /* emit 'mov eax, dword ptr [rax+0x14]' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); else EMIT1(0x8B); goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: /* emit 'mov rax, qword ptr [rax+0x14]' */ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); ldx: /* if insn->off == 0 we can save one extra byte, but * special case of x86 r13 which always needs an offset * is not worth the hassle */ if (is_imm8(insn->off)) EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); else EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), insn->off); break; /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ case BPF_STX | BPF_XADD | BPF_W: /* emit 'lock add dword ptr [rax + off], eax' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); else EMIT2(0xF0, 0x01); goto xadd; case BPF_STX | BPF_XADD | BPF_DW: EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); xadd: if (is_imm8(insn->off)) EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; /* call */ case BPF_JMP | BPF_CALL: func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); if (seen_ld_abs) { reload_skb_data = bpf_helper_changes_skb_data(func); if (reload_skb_data) { EMIT1(0x57); /* push %rdi */ jmp_offset += 22; /* pop, mov, sub, mov */ } else { EMIT2(0x41, 0x52); /* push %r10 */ EMIT2(0x41, 0x51); /* push %r9 */ /* need to adjust jmp offset, since * pop %r9, pop %r10 take 4 bytes after call insn */ jmp_offset += 4; } } if (!imm32 || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", imm32, func, image); return -EINVAL; } EMIT1_off32(0xE8, jmp_offset); if (seen_ld_abs) { if (reload_skb_data) { EMIT1(0x5F); /* pop %rdi */ emit_load_skb_data_hlen(&prog); } else { EMIT2(0x41, 0x59); /* pop %r9 */ EMIT2(0x41, 0x5A); /* pop %r10 */ } } break; case BPF_JMP | BPF_CALL | BPF_X: emit_bpf_tail_call(&prog); break; /* cond jump */ case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JNE | BPF_X: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: /* cmp dst_reg, src_reg */ EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: /* test dst_reg, src_reg */ EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ EMIT1(add_1mod(0x48, dst_reg)); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: /* cmp dst_reg, imm8/32 */ EMIT1(add_1mod(0x48, dst_reg)); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); else EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); emit_cond_jmp: /* convert BPF opcode to x86 */ switch (BPF_OP(insn->code)) { case BPF_JEQ: jmp_cond = X86_JE; break; case BPF_JSET: case BPF_JNE: jmp_cond = X86_JNE; break; case BPF_JGT: /* GT is unsigned '>', JA in x86 */ jmp_cond = X86_JA; break; case BPF_JGE: /* GE is unsigned '>=', JAE in x86 */ jmp_cond = X86_JAE; break; case BPF_JSGT: /* signed '>', GT in x86 */ jmp_cond = X86_JG; break; case BPF_JSGE: /* signed '>=', GE in x86 */ jmp_cond = X86_JGE; break; default: /* to silence gcc warning */ return -EFAULT; } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8(jmp_offset)) { EMIT2(jmp_cond, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); } else { pr_err("cond_jmp gen bug %llx\n", jmp_offset); return -EFAULT; } break; case BPF_JMP | BPF_JA: jmp_offset = addrs[i + insn->off] - addrs[i]; if (!jmp_offset) /* optimize out nop jumps */ break; emit_jmp: if (is_imm8(jmp_offset)) { EMIT2(0xEB, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT1_off32(0xE9, jmp_offset); } else { pr_err("jmp gen bug %llx\n", jmp_offset); return -EFAULT; } break; case BPF_LD | BPF_IND | BPF_W: func = sk_load_word; goto common_load; case BPF_LD | BPF_ABS | BPF_W: func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); common_load: ctx->seen_ld_abs = seen_ld_abs = true; jmp_offset = func - (image + addrs[i]); if (!func || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", imm32, func, image); return -EINVAL; } if (BPF_MODE(insn->code) == BPF_ABS) { /* mov %esi, imm32 */ EMIT1_off32(0xBE, imm32); } else { /* mov %rsi, src_reg */ EMIT_mov(BPF_REG_2, src_reg); if (imm32) { if (is_imm8(imm32)) /* add %esi, imm8 */ EMIT3(0x83, 0xC6, imm32); else /* add %esi, imm32 */ EMIT2_off32(0x81, 0xC6, imm32); } } /* skb pointer is in R6 (%rbx), it will be copied into * %rdi if skb_copy_bits() call is necessary. * sk_load_* helpers also use %r10 and %r9d. * See bpf_jit.S */ EMIT1_off32(0xE8, jmp_offset); /* call */ break; case BPF_LD | BPF_IND | BPF_H: func = sk_load_half; goto common_load; case BPF_LD | BPF_ABS | BPF_H: func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); goto common_load; case BPF_LD | BPF_IND | BPF_B: func = sk_load_byte; goto common_load; case BPF_LD | BPF_ABS | BPF_B: func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); goto common_load; case BPF_JMP | BPF_EXIT: if (seen_exit) { jmp_offset = ctx->cleanup_addr - addrs[i]; goto emit_jmp; } seen_exit = true; /* update cleanup_addr */ ctx->cleanup_addr = proglen; /* mov rbx, qword ptr [rbp-X] */ EMIT3_off32(0x48, 0x8B, 0x9D, -STACKSIZE); /* mov r13, qword ptr [rbp-X] */ EMIT3_off32(0x4C, 0x8B, 0xAD, -STACKSIZE + 8); /* mov r14, qword ptr [rbp-X] */ EMIT3_off32(0x4C, 0x8B, 0xB5, -STACKSIZE + 16); /* mov r15, qword ptr [rbp-X] */ EMIT3_off32(0x4C, 0x8B, 0xBD, -STACKSIZE + 24); EMIT1(0xC9); /* leave */ EMIT1(0xC3); /* ret */ break; default: /* By design x64 JIT should support all BPF instructions * This error will be seen if new instruction was added * to interpreter, but not to JIT * or if there is junk in bpf_prog */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; } ilen = prog - temp; if (ilen > BPF_MAX_INSN_SIZE) { pr_err("bpf_jit_compile fatal insn size error\n"); return -EFAULT; } if (image) { if (unlikely(proglen + ilen > oldproglen)) { pr_err("bpf_jit_compile fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); } proglen += ilen; addrs[i] = proglen; prog = temp; } return proglen; }
static int fix_program(pcap_t *handle, struct sock_fprog *fcode, int is_mmapped) { size_t prog_size; register int i; register struct bpf_insn *p; struct bpf_insn *f; int len; /* * Make a copy of the filter, and modify that copy if * necessary. */ prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len; len = handle->fcode.bf_len; f = (struct bpf_insn *)malloc(prog_size); if (f == NULL) { snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "malloc: %s", pcap_strerror(errno)); return -1; } memcpy(f, handle->fcode.bf_insns, prog_size); fcode->len = len; fcode->filter = (struct sock_filter *) f; for (i = 0; i < len; ++i) { p = &f[i]; /* * What type of instruction is this? */ switch (BPF_CLASS(p->code)) { case BPF_RET: /* * It's a return instruction; are we capturing * in memory-mapped mode? */ if (!is_mmapped) { /* * No; is the snapshot length a constant, * rather than the contents of the * accumulator? */ if (BPF_MODE(p->code) == BPF_K) { /* * Yes - if the value to be returned, * i.e. the snapshot length, is * anything other than 0, make it * 65535, so that the packet is * truncated by "recvfrom()", * not by the filter. * * XXX - there's nothing we can * easily do if it's getting the * value from the accumulator; we'd * have to insert code to force * non-zero values to be 65535. */ if (p->k != 0) p->k = 65535; } } break; case BPF_LD: case BPF_LDX: /* * It's a load instruction; is it loading * from the packet? */ switch (BPF_MODE(p->code)) { case BPF_ABS: case BPF_IND: case BPF_MSH: /* * Yes; are we in cooked mode? */ if (handle->md.cooked) { /* * Yes, so we need to fix this * instruction. */ if (fix_offset(p) < 0) { /* * We failed to do so. * Return 0, so our caller * knows to punt to userland. */ return 0; } } break; } break; } } return 1; /* we succeeded */ }