/* * npf_normalise_ip4: routine to normalise IPv4 header (randomise ID, * clear "don't fragment" and/or enforce minimum TTL). */ static inline bool npf_normalise_ip4(npf_cache_t *npc, nbuf_t *nbuf, npf_normalise_t *np) { void *n_ptr = nbuf_dataptr(nbuf); struct ip *ip = &npc->npc_ip.v4; uint16_t cksum = ip->ip_sum; uint16_t ip_off = ip->ip_off; uint8_t ttl = ip->ip_ttl; u_int minttl = np->n_minttl; u_int offby = 0; KASSERT(np->n_random_id || np->n_no_df || minttl); /* Randomise IPv4 ID. */ if (np->n_random_id) { uint16_t oid = ip->ip_id, nid; nid = htons(ip_randomid(ip_ids, 0)); offby = offsetof(struct ip, ip_id); if (nbuf_advstore(&nbuf, &n_ptr, offby, sizeof(nid), &nid)) { return false; } cksum = npf_fixup16_cksum(cksum, oid, nid); ip->ip_id = nid; }
static int npf_reassembly(npf_t *npf, npf_cache_t *npc, struct mbuf **mp) { nbuf_t *nbuf = npc->npc_nbuf; int error = EINVAL; /* Reset the mbuf as it may have changed. */ *mp = nbuf_head_mbuf(nbuf); nbuf_reset(nbuf); if (npf_iscached(npc, NPC_IP4)) { struct ip *ip = nbuf_dataptr(nbuf); error = ip_reass_packet(mp, ip); } else if (npf_iscached(npc, NPC_IP6)) { /* * Note: ip6_reass_packet() offset is the start of * the fragment header. */ error = ip6_reass_packet(mp, npc->npc_hlen); if (error && *mp == NULL) { memset(nbuf, 0, sizeof(nbuf_t)); } } if (error) { npf_stats_inc(npf, NPF_STAT_REASSFAIL); return error; } if (*mp == NULL) { /* More fragments should come. */ npf_stats_inc(npf, NPF_STAT_FRAGMENTS); return 0; } /* * Reassembly is complete, we have the final packet. * Cache again, since layer 4 data is accessible now. */ nbuf_init(npf, nbuf, *mp, nbuf->nb_ifp); npc->npc_info = 0; if (npf_cache_all(npc) & NPC_IPFRAG) { return EINVAL; } npf_stats_inc(npf, NPF_STAT_REASSEMBLY); return 0; }
/* * npfa_icmp_session: ALG session inspector, returns unique identifiers. */ static bool npfa_icmp_session(npf_cache_t *npc, nbuf_t *nbuf, void *keyptr) { npf_cache_t *key = keyptr; bool ret; KASSERT(key->npc_info == 0); /* IP + ICMP? Get unique identifiers from ICMP packet. */ if (!npf_iscached(npc, NPC_IP4)) { return false; } if (npf_cache_ipproto(npc) != IPPROTO_ICMP) { return false; } KASSERT(npf_iscached(npc, NPC_ICMP)); /* Advance to ICMP header. */ void *n_ptr = nbuf_dataptr(nbuf); const u_int hlen = npf_cache_hlen(npc); if ((n_ptr = nbuf_advance(&nbuf, n_ptr, hlen)) == NULL) { return false; } /* * Fetch relevant data into the separate ("key") cache. */ struct icmp *ic = &npc->npc_l4.icmp; if (npf_iscached(npc, NPC_IP4)) { ret = npf_icmp4_uniqid(ic->icmp_type, key, nbuf, n_ptr); } else if (npf_iscached(npc, NPC_IP6)) { KASSERT(offsetof(struct icmp, icmp_id) == offsetof(struct icmp6_hdr, icmp6_id)); ret = npf_icmp6_uniqid(ic->icmp_type, key, nbuf, n_ptr); } else {
/* * npf_packet_handler: main packet handling routine for layer 3. * * Note: packet flow and inspection logic is in strict order. */ int npf_packet_handler(void *arg, struct mbuf **mp, ifnet_t *ifp, int di) { nbuf_t *nbuf = *mp; npf_cache_t npc; npf_session_t *se; npf_ruleset_t *rlset; npf_rule_t *rl; npf_rproc_t *rp; int error, retfl; int decision; /* * Initialise packet information cache. * Note: it is enough to clear the info bits. */ npc.npc_info = 0; decision = NPF_DECISION_BLOCK; error = 0; retfl = 0; rp = NULL; /* Cache everything. Determine whether it is an IP fragment. */ if (npf_cache_all(&npc, nbuf) & NPC_IPFRAG) { /* * Pass to IPv4 or IPv6 reassembly mechanism. */ error = EINVAL; if (npf_iscached(&npc, NPC_IP4)) { struct ip *ip = nbuf_dataptr(*mp); error = ip_reass_packet(mp, ip); } else if (npf_iscached(&npc, NPC_IP6)) { #ifdef INET6 /* * Note: ip6_reass_packet() offset is the start of * the fragment header. */ const u_int hlen = npf_cache_hlen(&npc); error = ip6_reass_packet(mp, hlen); #endif } if (error) { npf_stats_inc(NPF_STAT_REASSFAIL); se = NULL; goto out; } if (*mp == NULL) { /* More fragments should come; return. */ npf_stats_inc(NPF_STAT_FRAGMENTS); return 0; } /* * Reassembly is complete, we have the final packet. * Cache again, since layer 4 data is accessible now. */ nbuf = (nbuf_t *)*mp; npc.npc_info = 0; if (npf_cache_all(&npc, nbuf) & NPC_IPFRAG) { se = NULL; goto out; } npf_stats_inc(NPF_STAT_REASSEMBLY); } /* Inspect the list of sessions. */ se = npf_session_inspect(&npc, nbuf, ifp, di, &error); /* If "passing" session found - skip the ruleset inspection. */ if (se && npf_session_pass(se, &rp)) { npf_stats_inc(NPF_STAT_PASS_SESSION); KASSERT(error == 0); goto pass; } if (error) { goto block; } /* Acquire the lock, inspect the ruleset using this packet. */ npf_core_enter(); rlset = npf_core_ruleset(); rl = npf_ruleset_inspect(&npc, nbuf, rlset, ifp, di, NPF_LAYER_3); if (rl == NULL) { bool default_pass = npf_default_pass(); npf_core_exit(); if (default_pass) { npf_stats_inc(NPF_STAT_PASS_DEFAULT); goto pass; } npf_stats_inc(NPF_STAT_BLOCK_DEFAULT); goto block; } /* * Get the rule procedure (acquires a reference) for assocation * with a session (if any) and execution. */ KASSERT(rp == NULL); rp = npf_rule_getrproc(rl); /* Apply the rule, release the lock. */ error = npf_rule_apply(&npc, nbuf, rl, &retfl); if (error) { npf_stats_inc(NPF_STAT_BLOCK_RULESET); goto block; } npf_stats_inc(NPF_STAT_PASS_RULESET); /* * Establish a "pass" session, if required. Just proceed, if session * creation fails (e.g. due to unsupported protocol). * * Note: the reference on the rule procedure is transfered to the * session. It will be released on session destruction. */ if ((retfl & NPF_RULE_STATEFUL) != 0 && !se) { se = npf_session_establish(&npc, nbuf, ifp, di); if (se) { npf_session_setpass(se, rp); } } pass: decision = NPF_DECISION_PASS; KASSERT(error == 0); /* * Perform NAT. */ error = npf_do_nat(&npc, se, nbuf, ifp, di); block: /* * Execute the rule procedure, if any is associated. * It may reverse the decision from pass to block. */ if (rp) { npf_rproc_run(&npc, nbuf, rp, &decision); } out: /* * Release the reference on a session. Release the reference on a * rule procedure only if there was no association. */ if (se) { npf_session_release(se); } else if (rp) { npf_rproc_release(rp); } /* Pass the packet if decided and there is no error. */ if (decision == NPF_DECISION_PASS && !error) { /* * XXX: Disable for now, it will be set accordingly later, * for optimisations (to reduce inspection). */ (*mp)->m_flags &= ~M_CANFASTFWD; return 0; } /* * Block the packet. ENETUNREACH is used to indicate blocking. * Depending on the flags and protocol, return TCP reset (RST) or * ICMP destination unreachable. */ if (retfl && npf_return_block(&npc, nbuf, retfl)) { *mp = NULL; } if (!error) { error = ENETUNREACH; } if (*mp) { m_freem(*mp); *mp = NULL; } return error; }
/* * npf_ncode_process: process n-code using data of the specified packet. * * => Argument nbuf (network buffer) is opaque to this function. * => Chain of nbufs (and their data) should be protected from any change. * => N-code memory address and thus instructions should be aligned. * => N-code should be protected from any change. * => Routine prevents from infinite loop. */ int npf_ncode_process(npf_cache_t *npc, const void *ncode, nbuf_t *nbuf0, const int layer) { /* N-code instruction pointer. */ const void * i_ptr; /* Pointer of current nbuf in the chain. */ nbuf_t * nbuf; /* Data pointer in the current nbuf. */ void * n_ptr; /* Virtual registers. */ uint32_t regs[NPF_NREGS]; /* Local, state variables. */ uint32_t d, i, n; npf_addr_t addr; u_int lcount; int cmpval; i_ptr = ncode; regs[0] = layer; lcount = NPF_LOOP_LIMIT; cmpval = 0; /* Note: offset = n_ptr - nbuf_dataptr(nbuf); */ nbuf = nbuf0; n_ptr = nbuf_dataptr(nbuf); process_next: /* * Loop must always start on instruction, therefore first word * should be an opcode. Most used instructions are checked first. */ i_ptr = nc_fetch_word(i_ptr, &d); if (__predict_true(NPF_CISC_OPCODE(d))) { /* It is a CISC-like instruction. */ goto cisc_like; } /* * RISC-like instructions. * * - ADVR, LW, CMP, CMPR * - BEQ, BNE, BGT, BLT * - RET, TAG, MOVE * - AND, J, INVL */ switch (d) { case NPF_OPCODE_ADVR: i_ptr = nc_fetch_word(i_ptr, &i); /* Register */ KASSERT(i < NPF_NREGS); n_ptr = nbuf_advance(&nbuf, n_ptr, regs[i]); if (__predict_false(n_ptr == NULL)) { goto fail; } break; case NPF_OPCODE_LW: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Size, register */ KASSERT(i < NPF_NREGS); KASSERT(n >= sizeof(uint8_t) && n <= sizeof(uint32_t)); if (nbuf_fetch_datum(nbuf, n_ptr, n, (uint32_t *)regs + i)) { goto fail; } break; case NPF_OPCODE_CMP: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Value, register */ KASSERT(i < NPF_NREGS); if (n != regs[i]) { cmpval = (n > regs[i]) ? 1 : -1; } else { cmpval = 0; } break; case NPF_OPCODE_CMPR: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Value, register */ KASSERT(i < NPF_NREGS); if (regs[n] != regs[i]) { cmpval = (regs[n] > regs[i]) ? 1 : -1; } else { cmpval = 0; } break; case NPF_OPCODE_BEQ: i_ptr = nc_fetch_word(i_ptr, &n); /* N-code line */ if (cmpval == 0) goto make_jump; break; case NPF_OPCODE_BNE: i_ptr = nc_fetch_word(i_ptr, &n); if (cmpval != 0) goto make_jump; break; case NPF_OPCODE_BGT: i_ptr = nc_fetch_word(i_ptr, &n); if (cmpval > 0) goto make_jump; break; case NPF_OPCODE_BLT: i_ptr = nc_fetch_word(i_ptr, &n); if (cmpval < 0) goto make_jump; break; case NPF_OPCODE_RET: (void)nc_fetch_word(i_ptr, &n); /* Return value */ return n; case NPF_OPCODE_TAG: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Key, value */ if (nbuf_add_tag(n_ptr, n, i)) { goto fail; } break; case NPF_OPCODE_MOVE: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Value, register */ KASSERT(i < NPF_NREGS); regs[i] = n; break; case NPF_OPCODE_AND: i_ptr = nc_fetch_double(i_ptr, &n, &i); /* Value, register */ KASSERT(i < NPF_NREGS); regs[i] = n & regs[i]; break; case NPF_OPCODE_J: i_ptr = nc_fetch_word(i_ptr, &n); /* N-code line */ make_jump: i_ptr = nc_jump(i_ptr, n - 2, &lcount); if (__predict_false(i_ptr == NULL)) { goto fail; } break; case NPF_OPCODE_INVL: /* Invalidate all cached data. */ npc->npc_info = 0; break; default: /* Invalid instruction. */ KASSERT(false); } goto process_next; cisc_like: /* * CISC-like instructions. */ switch (d) { case NPF_OPCODE_IP4MASK: /* Source/destination, network address, subnet. */ i_ptr = nc_fetch_word(i_ptr, &d); i_ptr = nc_fetch_double(i_ptr, &addr.s6_addr32[0], &n); cmpval = npf_match_ipmask(npc, nbuf, n_ptr, (sizeof(struct in_addr) << 1) | (d & 0x1), &addr, (npf_netmask_t)n); break; case NPF_OPCODE_IP6MASK: /* Source/destination, network address, subnet. */ i_ptr = nc_fetch_word(i_ptr, &d); i_ptr = nc_fetch_double(i_ptr, &addr.s6_addr32[0], &addr.s6_addr32[1]); i_ptr = nc_fetch_double(i_ptr, &addr.s6_addr32[2], &addr.s6_addr32[3]); i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_ipmask(npc, nbuf, n_ptr, (sizeof(struct in6_addr) << 1) | (d & 0x1), &addr, (npf_netmask_t)n); break; case NPF_OPCODE_TABLE: /* Source/destination, NPF table ID. */ i_ptr = nc_fetch_double(i_ptr, &n, &i); cmpval = npf_match_table(npc, nbuf, n_ptr, n, i); break; case NPF_OPCODE_TCP_PORTS: /* Source/destination, port range. */ i_ptr = nc_fetch_double(i_ptr, &n, &i); cmpval = npf_match_tcp_ports(npc, nbuf, n_ptr, n, i); break; case NPF_OPCODE_UDP_PORTS: /* Source/destination, port range. */ i_ptr = nc_fetch_double(i_ptr, &n, &i); cmpval = npf_match_udp_ports(npc, nbuf, n_ptr, n, i); break; case NPF_OPCODE_TCP_FLAGS: /* TCP flags/mask. */ i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_tcpfl(npc, nbuf, n_ptr, n); break; case NPF_OPCODE_ICMP4: /* ICMP type/code. */ i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_icmp4(npc, nbuf, n_ptr, n); break; case NPF_OPCODE_ICMP6: /* ICMP type/code. */ i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_icmp6(npc, nbuf, n_ptr, n); break; case NPF_OPCODE_PROTO: i_ptr = nc_fetch_word(i_ptr, &n); cmpval = npf_match_proto(npc, nbuf, n_ptr, n); break; case NPF_OPCODE_ETHER: /* Source/destination, reserved, ethernet type. */ i_ptr = nc_fetch_word(i_ptr, &d); i_ptr = nc_fetch_double(i_ptr, &n, &i); cmpval = npf_match_ether(nbuf, d, n, i, ®s[NPF_NREGS - 1]); break; default: /* Invalid instruction. */ KASSERT(false); } goto process_next; fail: /* Failure case. */ return -1; }
static int test_bpf_code(void *code, size_t size) { ifnet_t *dummy_ifp = npf_test_addif(IFNAME_TEST, false, false); npf_cache_t npc = { .npc_info = 0, .npc_ctx = npf_getkernctx() }; uint32_t memstore[BPF_MEMWORDS]; bpf_args_t bc_args; struct mbuf *m; nbuf_t nbuf; int ret, jret; void *jcode; /* Layer 3 (IP + TCP). */ m = fill_packet(IPPROTO_TCP); nbuf_init(npf_getkernctx(), &nbuf, m, dummy_ifp); npc.npc_nbuf = &nbuf; npf_cache_all(&npc); #ifdef _NPF_STANDALONE bc_args.pkt = (const uint8_t *)nbuf_dataptr(&nbuf); #else bc_args.pkt = (const uint8_t *)m; #endif bc_args.buflen = m_length(m); bc_args.wirelen = bc_args.buflen; bc_args.mem = memstore; bc_args.arg = &npc; ret = npf_bpf_filter(&bc_args, code, NULL); /* JIT-compiled code. */ jcode = npf_bpf_compile(code, size); if (jcode) { jret = npf_bpf_filter(&bc_args, NULL, jcode); assert(ret == jret); bpf_jit_freecode(jcode); } else if (lverbose) { printf("JIT-compilation failed\n"); } m_freem(m); return ret; } static uint32_t npf_bpfcop_run(u_int reg) { struct bpf_insn insns_npf_bpfcop[] = { BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3), BPF_STMT(BPF_LD+BPF_W+BPF_MEM, reg), BPF_STMT(BPF_RET+BPF_A, 0), }; return test_bpf_code(&insns_npf_bpfcop, sizeof(insns_npf_bpfcop)); } static bool npf_bpfcop_test(void) { bool fail = false; /* A <- IP version (4 or 6) */ struct bpf_insn insns_ipver[] = { BPF_STMT(BPF_MISC+BPF_COP, NPF_COP_L3), BPF_STMT(BPF_RET+BPF_A, 0), }; fail |= (test_bpf_code(&insns_ipver, sizeof(insns_ipver)) != IPVERSION); /* BPF_MW_IPVERI <- IP version */ fail |= (npf_bpfcop_run(BPF_MW_IPVER) != IPVERSION); /* BPF_MW_L4OFF <- L4 header offset */ fail |= (npf_bpfcop_run(BPF_MW_L4OFF) != sizeof(struct ip)); /* BPF_MW_L4PROTO <- L4 protocol */ fail |= (npf_bpfcop_run(BPF_MW_L4PROTO) != IPPROTO_TCP); return fail; }