/** * This is just a wrapper. Its sole intent is to minimize mess below. */ static int find_mask_domain(struct xlation *state, struct ipv4_transport_addr *dst, struct mask_domain **masks) { struct ipv6hdr *hdr6 = pkt_ip6_hdr(&state->in); struct route4_args args = { .ns = state->jool.ns, .daddr = dst->l3, .tos = ttp64_xlat_tos(&state->jool.global->cfg, hdr6), .proto = ttp64_xlat_proto(hdr6), .mark = state->in.skb->mark, }; *masks = mask_domain_find(state->jool.nat64.pool4, &state->in.tuple, state->jool.global->cfg.nat64.f_args, &args); if (*masks) return 0; log_debug("There is no mask domain mapped to mark %u.", state->in.skb->mark); return -EINVAL; } /** * Assumes that "tuple" represents a IPv6-UDP or ICMP packet, and filters and * updates based on it. * * This is RFC 6146, first halves of both sections 3.5.1 and 3.5.3. * * @pkt: tuple's packet. This is actually only used for error reporting. * @tuple: summary of the packet Jool is currently translating. */ static verdict ipv6_simple(struct xlation *state) { struct ipv4_transport_addr dst4; struct mask_domain *masks; int error; if (xlat_dst_6to4(state, &dst4)) return drop(state, JSTAT_UNTRANSLATABLE_DST6); if (find_mask_domain(state, &dst4, &masks)) return drop(state, JSTAT_MASK_DOMAIN_NOT_FOUND); error = bib_add6(state, masks, &state->in.tuple, &dst4); mask_domain_put(masks); switch (error) { case 0: return succeed(state); default: /* * Error msg already printed, but since bib_add6() sprawls * messily, let's leave this here just in case. */ log_debug("bib_add6() threw error code %d.", error); return drop(state, JSTAT_BIB6_NOT_FOUND); } } /** * Assumes that "tuple" represents a IPv4-UDP or ICMP packet, and filters and * updates based on it. * * This is RFC 6146, second halves of both sections 3.5.1 and 3.5.3. * * @pkt skb tuple's packet. This is actually only used for error reporting. * @tuple4 tuple summary of the packet Jool is currently translating. * @return VER_CONTINUE if everything went OK, VER_DROP otherwise. */ static verdict ipv4_simple(struct xlation *state) { /* * Because this is the IPv4->IPv6 direction, what the tuple labels * "source" is what the BIB entry labels "destination." * We're inheriting this naming quirk from the RFC. */ struct ipv4_transport_addr *dst4 = &state->in.tuple.src.addr4; struct ipv6_transport_addr dst6; int error; if (RFC6052_4TO6(state, &dst4->l3, &dst6.l3)) return drop(state, JSTAT_UNTRANSLATABLE_DST4); dst6.l4 = dst4->l4; error = bib_add4(state, &dst6, &state->in.tuple); switch (error) { case 0: return succeed(state); case -ESRCH: log_debug("There is no BIB entry for the IPv4 packet."); return untranslatable_icmp(state, JSTAT_BIB4_NOT_FOUND, ICMPERR_ADDR_UNREACHABLE, 0); case -EPERM: log_debug("Packet was blocked by Address-Dependent Filtering."); return drop_icmp(state, JSTAT_ADF, ICMPERR_FILTER, 0); default: log_debug("Errcode %d while finding a BIB entry.", error); return drop(state, JSTAT_UNKNOWN); } } /** * Filtering and updating during the V4 INIT state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_v4_init_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; switch (pkt_l3_proto(pkt)) { case L3PROTO_IPV6: if (pkt_tcp_hdr(pkt)->syn) { if (session->has_stored) log_debug("Simultaneous Open!"); session->state = ESTABLISHED; session->has_stored = false; return FATE_TIMER_EST; } break; /** * "OMG WHAT IS THIS?!!!!1!1oneone" * * Well, basically, they don't seem to have tested the packet storage * thing all that well while writing the RFC. * * This is a patch that helps type 2 packets work. This is the problem: * * - IPv4 node n4 writes a TCP SYN. Let's call this packet "A". * A arrives to the NAT64. * - Let's say there is a BIB entry but no session that matches A, and * also, ADF is active, so the NAT64 decides to store A. * To this end, it creates and stores session entry [src6=a, dst6=b, * src4=c, dst4=d, proto=TCP, state=V4 INIT, stored=A]. * A is not translated. * * The intent is that the NAT64 is now waiting for an IPv6 packet "B" * that is the Simultaneous Open counterpart to A. If B arrives within 6 * seconds, A is allowed, and if it doesn't, then A is not allowed and * will be ICMP errored. * So far so good, right? * * Wrong. * * The problem is that A created a fully valid session that corresponds * to itself. Because n4 doesn't receive an answer, it retries A. It * does so before the 6-second timeout because sockets are impatient * like that. So A2 arrives at the NAT64 and is translated successfully * because there's now a valid session that matches it. In other words, * A authorized itself despite ADF. * * One might argue that this would be a reason to not treat type 1 and 2 * packets differently: Simply store these bogus sessions away from the * main database and the A2 session lookup will fail. This doesn't work * either, because the whole thing is that this session needs to be * lookupable in the 6-to-4 direction, otherwise B cannot cancel the * ICMP error. * * Also, these sessions are mapped to a valid BIB entry, and as such * need to prevent this entry from dying. This is hard to enforce when * storing these sessions in another database. * * So the core of the issue is that the V4 INIT state lets v4 packets * through even when ADF is active. Hence this switch case. * (Because this only handles type 2 packets, ADF active = packet stored * in this case.) * * Type 1 packets don't suffer from this problem because they aren't * associated with a valid BIB entry. * * Similar to type 1 packets, we will assume that this retry is not * entitled to a session timeout update. Or any session updates, for * that matter. (See pktqueue_add()) */ case L3PROTO_IPV4: if (session->has_stored) { log_debug("Simultaneous Open already exists."); return FATE_DROP; } break; } return FATE_PRESERVE; } /** * Filtering and updating during the V6 INIT state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_v6_init_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; if (pkt_tcp_hdr(pkt)->syn) { switch (pkt_l3_proto(pkt)) { case L3PROTO_IPV4: session->state = ESTABLISHED; return FATE_TIMER_EST; case L3PROTO_IPV6: return FATE_TIMER_TRANS; } } return FATE_PRESERVE; } /** * Filtering and updating during the ESTABLISHED state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_established_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; if (pkt_tcp_hdr(pkt)->fin) { switch (pkt_l3_proto(pkt)) { case L3PROTO_IPV4: session->state = V4_FIN_RCV; break; case L3PROTO_IPV6: session->state = V6_FIN_RCV; break; } return FATE_PRESERVE; } else if (pkt_tcp_hdr(pkt)->rst) { session->state = TRANS; return FATE_TIMER_TRANS; } return FATE_TIMER_EST; } static bool handle_rst_during_fin_rcv(struct xlation *state) { return state->jool.global->cfg.nat64.handle_rst_during_fin_rcv; } /** * Filtering and updating during the V4 FIN RCV state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_v4_fin_rcv_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; struct tcphdr *hdr; if (pkt_l3_proto(pkt) == L3PROTO_IPV6) { hdr = pkt_tcp_hdr(pkt); if (hdr->fin) { session->state = V4_FIN_V6_FIN_RCV; return FATE_TIMER_TRANS; } if (hdr->rst && handle_rst_during_fin_rcv(state)) { /* https://github.com/NICMx/Jool/issues/212 */ return FATE_TIMER_TRANS; } } return FATE_TIMER_EST; } /** * Filtering and updating during the V6 FIN RCV state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_v6_fin_rcv_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; struct tcphdr *hdr; if (pkt_l3_proto(pkt) == L3PROTO_IPV4) { hdr = pkt_tcp_hdr(pkt); if (hdr->fin) { session->state = V4_FIN_V6_FIN_RCV; return FATE_TIMER_TRANS; } if (hdr->rst && handle_rst_during_fin_rcv(state)) { /* https://github.com/NICMx/Jool/issues/212 */ return FATE_TIMER_TRANS; } } return FATE_TIMER_EST; } /** * Filtering and updating during the V6 FIN + V4 FIN RCV state of the TCP state * machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_v4_fin_v6_fin_rcv_state(void) { return FATE_PRESERVE; /* Only the timeout can change this state. */ } /** * Filtering and updating done during the TRANS state of the TCP state machine. * Part of RFC 6146 section 3.5.2.2. */ static enum session_fate tcp_trans_state(struct session_entry *session, struct xlation *state) { struct packet *pkt = &state->in; if (!pkt_tcp_hdr(pkt)->rst) { session->state = ESTABLISHED; return FATE_TIMER_EST; } return FATE_PRESERVE; } static enum session_fate tcp_state_machine(struct session_entry *session, void *arg) { switch (session->state) { case ESTABLISHED: return tcp_established_state(session, arg); case V4_INIT: return tcp_v4_init_state(session, arg); case V6_INIT: return tcp_v6_init_state(session, arg); case V4_FIN_RCV: return tcp_v4_fin_rcv_state(session, arg); case V6_FIN_RCV: return tcp_v6_fin_rcv_state(session, arg); case V4_FIN_V6_FIN_RCV: return tcp_v4_fin_v6_fin_rcv_state(); case TRANS: return tcp_trans_state(session, arg); } WARN(true, "Invalid state found: %u.", session->state); return FATE_RM; } /** * IPv6 half of RFC 6146 section 3.5.2. */ static verdict ipv6_tcp(struct xlation *state) { struct ipv4_transport_addr dst4; struct collision_cb cb; struct mask_domain *masks; verdict result; if (xlat_dst_6to4(state, &dst4)) return drop(state, JSTAT_UNTRANSLATABLE_DST6); if (find_mask_domain(state, &dst4, &masks)) return drop(state, JSTAT_MASK_DOMAIN_NOT_FOUND); cb.cb = tcp_state_machine; cb.arg = state; result = bib_add_tcp6(state, masks, &dst4, &cb); mask_domain_put(masks); return (result == VERDICT_CONTINUE) ? succeed(state) : result; } /** * IPv4 half of RFC 6146 section 3.5.2. */ static verdict ipv4_tcp(struct xlation *state) { struct ipv4_transport_addr *dst4 = &state->in.tuple.src.addr4; struct ipv6_transport_addr dst6; struct collision_cb cb; verdict result; if (RFC6052_4TO6(state, &dst4->l3, &dst6.l3)) return drop(state, JSTAT_UNTRANSLATABLE_DST4); dst6.l4 = dst4->l4; cb.cb = tcp_state_machine; cb.arg = state; result = bib_add_tcp4(state, &dst6, &cb); return (result == VERDICT_CONTINUE) ? succeed(state) : result; } /** * filtering_and_updating - Main F&U routine. Decides if "skb" should be * processed, updating binding and session information. */ verdict filtering_and_updating(struct xlation *state) { struct packet *in = &state->in; struct ipv6hdr *hdr_ip6; verdict result = VERDICT_CONTINUE; log_debug("Step 2: Filtering and Updating"); switch (pkt_l3_proto(in)) { case L3PROTO_IPV6: /* Get rid of hairpinning loops and unwanted packets. */ hdr_ip6 = pkt_ip6_hdr(in); if (pool6_contains(state, &hdr_ip6->saddr)) { log_debug("Hairpinning loop. Dropping..."); return drop(state, JSTAT_HAIRPIN_LOOP); } if (!pool6_contains(state, &hdr_ip6->daddr)) { log_debug("Packet does not belong to pool6."); return untranslatable(state, JSTAT_POOL6_MISMATCH); } /* ICMP errors should not be filtered or affect the tables. */ if (pkt_is_icmp6_error(in)) { log_debug("Packet is ICMPv6 error; skipping step..."); return VERDICT_CONTINUE; } break; case L3PROTO_IPV4: /* Get rid of unexpected packets */ if (!pool4db_contains(state->jool.nat64.pool4, state->jool.ns, in->tuple.l4_proto, &in->tuple.dst.addr4)) { log_debug("Packet does not belong to pool4."); return untranslatable(state, JSTAT_POOL4_MISMATCH); } /* ICMP errors should not be filtered or affect the tables. */ if (pkt_is_icmp4_error(in)) { log_debug("Packet is ICMPv4 error; skipping step..."); return VERDICT_CONTINUE; } break; } /* * Note: I'm sorry, but the remainder of the Filtering and Updating step * is not going to be done in the order in which the RFC explains it. * This is because the BIB has a critical spinlock, and we need to * take out as much work from it as possible. */ switch (pkt_l4_proto(in)) { case L4PROTO_UDP: switch (pkt_l3_proto(in)) { case L3PROTO_IPV6: result = ipv6_simple(state); break; case L3PROTO_IPV4: result = ipv4_simple(state); break; } break; case L4PROTO_TCP: switch (pkt_l3_proto(in)) { case L3PROTO_IPV6: result = ipv6_tcp(state); break; case L3PROTO_IPV4: result = ipv4_tcp(state); break; } break; case L4PROTO_ICMP: switch (pkt_l3_proto(in)) { case L3PROTO_IPV6: if (state->jool.global->cfg.nat64.drop_icmp6_info) { log_debug("Packet is ICMPv6 info (ping); dropping due to policy."); return drop(state, JSTAT_ICMP6_FILTER); } result = ipv6_simple(state); break; case L3PROTO_IPV4: result = ipv4_simple(state); break; } break; case L4PROTO_OTHER: WARN(true, "Unknown layer 4 protocol: %d", pkt_l4_proto(in)); return drop(state, JSTAT_UNKNOWN_L4_PROTO); } log_debug("Done: Step 2."); return result; }
static bool test_function_icmp6_minimum_mtu(void) { struct xlation state = { .jool.global = config }; int i; bool success = true; /* * I'm assuming the default plateaus list has 3 elements or more. * (so I don't have to reallocate mtu_plateaus) */ config->cfg.mtu_plateaus[0] = 5000; config->cfg.mtu_plateaus[1] = 4000; config->cfg.mtu_plateaus[2] = 500; config->cfg.mtu_plateau_count = 2; /* Simple tests */ success &= ASSERT_UINT(1320, min_mtu(1300, 3000, 3000, 2000), "min(1300, 3000, 3000)"); success &= ASSERT_UINT(1321, min_mtu(3001, 1301, 3001, 2001), "min(3001, 1301, 3001)"); success &= ASSERT_UINT(1302, min_mtu(3002, 3002, 1302, 2002), "min(3002, 3002, 1302)"); if (!success) return false; /* Lowest MTU is illegal on IPv6. */ success &= ASSERT_UINT(1280, min_mtu(100, 200, 200, 150), "min(100, 200, 200)"); success &= ASSERT_UINT(1280, min_mtu(200, 100, 200, 150), "min(200, 100, 200)"); success &= ASSERT_UINT(1280, min_mtu(200, 200, 100, 150), "min(200, 200, 100)"); /* Test plateaus (pkt is min). */ for (i = 5500; i > 5000 && success; --i) success &= ASSERT_UINT(5020, min_mtu(0, 6000, 6000, i), "min(%d, 6000, 6000)", i); for (i = 5000; i > 4000 && success; --i) success &= ASSERT_UINT(4020, min_mtu(0, 6000, 6000, i), "min(%d, 6000, 6000)", i); for (i = 4000; i >= 0 && success; --i) success &= ASSERT_UINT(1280, min_mtu(0, 6000, 6000, i), "min(%d, 6000, 6000)", i); /* Test plateaus (in/out is min). */ success &= ASSERT_UINT(1420, min_mtu(0, 1400, 5500, 4500), "min(4000,1400,5500)"); success &= ASSERT_UINT(1400, min_mtu(0, 5500, 1400, 4500), "min(4000,5500,1400)"); /* Plateaus and illegal MTU at the same time. */ success &= ASSERT_UINT(1280, min_mtu(0, 700, 700, 1000), "min(500, 700, 700)"); success &= ASSERT_UINT(1280, min_mtu(0, 1, 700, 1000), "min(500, 1, 700)"); success &= ASSERT_UINT(1280, min_mtu(0, 700, 1, 1000), "min(500, 700, 1)"); return success; } #undef min_mtu static bool test_function_icmp4_to_icmp6_param_prob(void) { struct icmphdr hdr4; struct icmp6hdr hdr6; bool success = true; hdr4.type = ICMP_PARAMETERPROB; hdr4.code = ICMP_PTR_INDICATES_ERROR; hdr4.icmp4_unused = cpu_to_be32(0x08000000U); success &= ASSERT_INT(0, icmp4_to_icmp6_param_prob(&hdr4, &hdr6), "func result 1"); success &= ASSERT_UINT(ICMPV6_HDR_FIELD, hdr6.icmp6_code, "code"); success &= ASSERT_UINT(7, be32_to_cpu(hdr6.icmp6_pointer), "pointer"); hdr4.icmp4_unused = cpu_to_be32(0x05000000U); success &= ASSERT_INT(-EINVAL, icmp4_to_icmp6_param_prob(&hdr4, &hdr6), "func result 2"); return success; } static bool test_function_generate_ipv4_id(void) { struct frag_hdr hdr; __be16 attempt_1, attempt_2, attempt_3; bool success = true; attempt_1 = generate_ipv4_id(NULL); attempt_2 = generate_ipv4_id(NULL); attempt_3 = generate_ipv4_id(NULL); /* * At least one of the attempts should be nonzero, * otherwise the random would be sucking major ****. */ success &= ASSERT_BOOL(true, (attempt_1 | attempt_2 | attempt_3) != 0, "No frag"); hdr.identification = 0; success &= ASSERT_BE16(0, generate_ipv4_id(&hdr), "Simplest id"); hdr.identification = cpu_to_be32(0x0000abcdU); success &= ASSERT_BE16(0xabcd, generate_ipv4_id(&hdr), "No overflow"); hdr.identification = cpu_to_be32(0x12345678U); success &= ASSERT_BE16(0x5678, generate_ipv4_id(&hdr), "Overflow"); return success; } static bool test_function_generate_df_flag(void) { struct packet pkt; struct sk_buff *skb; bool success = true; skb = alloc_skb(1500, GFP_ATOMIC); if (!skb) return false; pkt.skb = skb; skb_put(skb, 1000); success &= ASSERT_UINT(0, generate_df_flag(&pkt), "Len < 1260"); skb_put(skb, 260); success &= ASSERT_UINT(0, generate_df_flag(&pkt), "Len = 1260"); skb_put(skb, 200); success &= ASSERT_UINT(1, generate_df_flag(&pkt), "Len > 1260"); kfree_skb(skb); return success; } /** * By the way. This test kind of looks like it should test more combinations of headers. * But that'd be testing the header iterator, not the build_protocol_field() function. * Please look elsewhere for that. */ static bool test_function_build_protocol_field(void) { struct ipv6hdr *ip6_hdr; struct ipv6_opt_hdr *hop_by_hop_hdr; struct ipv6_opt_hdr *routing_hdr; struct ipv6_opt_hdr *dest_options_hdr; struct icmp6hdr *icmp6_hdr; ip6_hdr = kmalloc(sizeof(*ip6_hdr) + 8 + 16 + 24 + sizeof(struct tcphdr), GFP_ATOMIC); if (!ip6_hdr) { log_err("Could not allocate a test packet."); goto failure; } /* Just ICMP. */ ip6_hdr->nexthdr = NEXTHDR_ICMP; ip6_hdr->payload_len = cpu_to_be16(sizeof(*icmp6_hdr)); if (!ASSERT_UINT(IPPROTO_ICMP, ttp64_xlat_proto(ip6_hdr), "Just ICMP")) goto failure; /* Skippable headers then ICMP. */ ip6_hdr->nexthdr = NEXTHDR_HOP; ip6_hdr->payload_len = cpu_to_be16(8 + 16 + 24 + sizeof(*icmp6_hdr)); hop_by_hop_hdr = (struct ipv6_opt_hdr *) (ip6_hdr + 1); hop_by_hop_hdr->nexthdr = NEXTHDR_ROUTING; hop_by_hop_hdr->hdrlen = 0; /* the hdrlen field does not include the first 8 octets. */ routing_hdr = (struct ipv6_opt_hdr *) (((unsigned char *) hop_by_hop_hdr) + 8); routing_hdr->nexthdr = NEXTHDR_DEST; routing_hdr->hdrlen = 1; dest_options_hdr = (struct ipv6_opt_hdr *) (((unsigned char *) routing_hdr) + 16); dest_options_hdr->nexthdr = NEXTHDR_ICMP; dest_options_hdr->hdrlen = 2; if (!ASSERT_UINT(IPPROTO_ICMP, ttp64_xlat_proto(ip6_hdr), "Skippable then ICMP")) goto failure; /* Skippable headers then something else */ dest_options_hdr->nexthdr = NEXTHDR_TCP; ip6_hdr->payload_len = cpu_to_be16(8 + 16 + 24 + sizeof(struct tcphdr)); if (!ASSERT_UINT(IPPROTO_TCP, ttp64_xlat_proto(ip6_hdr), "Skippable then TCP")) goto failure; kfree(ip6_hdr); return true; failure: kfree(ip6_hdr); return false; } static bool test_function_has_nonzero_segments_left(void) { struct ipv6hdr *ip6_hdr; struct ipv6_rt_hdr *routing_hdr; struct frag_hdr *fragment_hdr; __u32 offset; bool success = true; ip6_hdr = kmalloc(sizeof(*ip6_hdr) + sizeof(*fragment_hdr) + sizeof(*routing_hdr), GFP_ATOMIC); if (!ip6_hdr) { log_err("Could not allocate a test packet."); return false; } ip6_hdr->payload_len = cpu_to_be16(sizeof(*fragment_hdr) + sizeof(*routing_hdr)); /* No extension headers. */ ip6_hdr->nexthdr = NEXTHDR_TCP; success &= ASSERT_BOOL(false, has_nonzero_segments_left(ip6_hdr, &offset), "No extension headers"); if (!success) goto end; /* Routing header with nonzero segments left. */ ip6_hdr->nexthdr = NEXTHDR_ROUTING; routing_hdr = (struct ipv6_rt_hdr *) (ip6_hdr + 1); routing_hdr->segments_left = 12; success &= ASSERT_BOOL(true, has_nonzero_segments_left(ip6_hdr, &offset), "Nonzero left - result"); success &= ASSERT_UINT(40 + 3, offset, "Nonzero left - offset"); if (!success) goto end; /* Routing header with zero segments left. */ routing_hdr->segments_left = 0; success &= ASSERT_BOOL(false, has_nonzero_segments_left(ip6_hdr, &offset), "Zero left"); if (!success) goto end; /* * Fragment header, then routing header with nonzero segments left * (further test the out parameter). */ ip6_hdr->nexthdr = NEXTHDR_FRAGMENT; fragment_hdr = (struct frag_hdr *) (ip6_hdr + 1); fragment_hdr->nexthdr = NEXTHDR_ROUTING; routing_hdr = (struct ipv6_rt_hdr *) (fragment_hdr + 1); routing_hdr->segments_left = 24; success &= ASSERT_BOOL(true, has_nonzero_segments_left(ip6_hdr, &offset), "Two headers - result"); success &= ASSERT_UINT(40 + 8 + 3, offset, "Two headers - offset"); /* Fall through. */ end: kfree(ip6_hdr); return success; } static bool test_function_icmp4_minimum_mtu(void) { bool success = true; success &= ASSERT_UINT(2, be16_to_cpu(minimum(2, 4, 6)), "First is min"); success &= ASSERT_UINT(8, be16_to_cpu(minimum(10, 8, 12)), "Second is min"); success &= ASSERT_UINT(14, be16_to_cpu(minimum(16, 18, 14)), "Third is min"); return success; }