/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; __be32 saddr = iph->saddr; __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; unsigned int hh_len; if (addr_type == RTN_UNSPEC) addr_type = inet_addr_type(net, saddr); if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST) flags |= FLOWI_FLAG_ANYSRC; else saddr = 0; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ fl4.daddr = iph->daddr; fl4.saddr = saddr; fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); if (skb_dst(skb)->error) return -1; #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } #endif /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 0, GFP_ATOMIC)) return -1; return 0; }
static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; const struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { .flowi4_oif = inet_iif(skb), .daddr = iph->saddr, .saddr = iph->daddr, .flowi4_tos = RT_CONN_FLAGS(sk), .flowi4_proto = sk->sk_protocol, .fl4_sport = dccp_hdr(skb)->dccph_dport, .fl4_dport = dccp_hdr(skb)->dccph_sport, }; security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->dst; } static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req) { int err = -1; struct sk_buff *skb; struct dst_entry *dst; struct flowi4 fl4; dst = inet_csk_route_req(sk, &fl4, req); if (dst == NULL) goto out; skb = dccp_make_response(sk, dst, req); if (skb != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); rcu_read_lock(); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, rcu_dereference(ireq->ireq_opt)); rcu_read_unlock(); err = net_xmit_eval(err); } out: dst_release(dst); return err; }
int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, .flags = FIB_LOOKUP_NOREF, }; int err; err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); res->r = arg.rule; return err; }
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; struct flowi4 fl4; int free = 0; __be32 daddr; __be32 saddr; u8 tos; int err; struct ip_options_data opt_copy; err = -EMSGSIZE; if (len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { pr_info_once("%s: %s forgot to set AF_INET. Fix it!\n", __func__, current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->opt.srr) { if (!daddr) goto done; daddr = ipc.opt->opt.faddr; } } tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } else if (!ipc.oif) ipc.oif = inet->uc_index; flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0, sock_i_uid(sk)); if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len, &rt, msg->msg_flags); else { if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); err = ip_append_data(sk, &fl4, ip_generic_getfrag, msg->msg_iov, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk, &fl4); if (err == -ENOBUFS && !inet->recverr) err = 0; } release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: if (err < 0) return err; return len; do_confirm: dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; }
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; __be32 daddr; __be32 saddr; u8 tos; int err; err = -EMSGSIZE; if (len > 0xFFFF) goto out; /* * Check the flags. */ err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message */ goto out; /* compatibility */ /* * Get and verify the address. */ if (msg->msg_namelen) { struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; err = -EINVAL; if (msg->msg_namelen < sizeof(*usin)) goto out; if (usin->sin_family != AF_INET) { static int complained; if (!complained++) printk(KERN_INFO "%s forgot to set AF_INET in " "raw sendmsg. Fix it!\n", current->comm); err = -EAFNOSUPPORT; if (usin->sin_family) goto out; } daddr = usin->sin_addr.s_addr; /* ANK: I did not forget to get protocol from port field. * I just do not know, who uses this weirdness. * IP_HDRINCL is much more convenient. */ } else { err = -EDESTADDRREQ; if (sk->sk_state != TCP_ESTABLISHED) goto out; daddr = inet->inet_daddr; } ipc.addr = inet->inet_saddr; ipc.opt = NULL; ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) goto out; if (ipc.opt) free = 1; } saddr = ipc.addr; ipc.addr = daddr; if (!ipc.opt) ipc.opt = inet->opt; if (ipc.opt) { err = -EINVAL; /* Linux does not mangle headers on raw sockets, * so that IP options + IP_HDRINCL is non-sense. */ if (inet->hdrincl) goto done; if (ipc.opt->srr) { if (!daddr) goto done; daddr = ipc.opt->faddr; } } tos = RT_CONN_FLAGS(sk); if (msg->msg_flags & MSG_DONTROUTE) tos |= RTO_ONLINK; if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) saddr = inet->mc_addr; } { struct flowi4 fl4 = { .flowi4_oif = ipc.oif, .flowi4_mark = sk->sk_mark, .daddr = daddr, .saddr = saddr, .flowi4_tos = tos, .flowi4_proto = (inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol), .flowi4_flags = FLOWI_FLAG_CAN_SLEEP, }; if (!inet->hdrincl) { err = raw_probe_proto_opt(&fl4, msg); if (err) goto done; } security_sk_classify_flow(sk, flowi4_to_flowi(&fl4)); rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; goto done; } } err = -EACCES; if (rt->rt_flags & RTCF_BROADCAST && !sock_flag(sk, SOCK_BROADCAST)) goto done; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (inet->hdrincl) err = raw_send_hdrinc(sk, msg->msg_iov, len, &rt, msg->msg_flags); else { if (!ipc.addr) ipc.addr = rt->rt_dst; lock_sock(sk); err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) { err = ip_push_pending_frames(sk); if (err == -ENOBUFS && !inet->recverr) err = 0; } release_sock(sk); } done: if (free) kfree(ipc.opt); ip_rt_put(rt); out: if (err < 0) return err; return len; do_confirm: dst_confirm(&rt->dst); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } static void raw_close(struct sock *sk, long timeout) { /* * Raw sockets may have direct kernel refereneces. Kill them. */ ip_ra_control(sk, 0, NULL); sk_common_release(sk); }
static int callforward_do_filter(const union nf_inet_addr *src, const union nf_inet_addr *dst, u_int8_t family) { const struct nf_afinfo *afinfo; int ret = 0; afinfo = nf_get_afinfo(family); if (!afinfo) return 0; switch (family) { case AF_INET: { struct flowi4 fl1, fl2; struct rtable *rt1, *rt2; memset(&fl1, 0, sizeof(fl1)); fl1.daddr = src->ip; memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->ip; if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi4_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi4_to_flowi(&fl2), false)) { if (rt1->rt_gateway == rt2->rt_gateway && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); } dst_release(&rt1->dst); } break; } #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6) case AF_INET6: { struct flowi6 fl1, fl2; struct rt6_info *rt1, *rt2; memset(&fl1, 0, sizeof(fl1)); fl1.daddr = src->in6; memset(&fl2, 0, sizeof(fl2)); fl2.daddr = dst->in6; if (!afinfo->route(&init_net, (struct dst_entry **)&rt1, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, sizeof(rt1->rt6i_gateway)) && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); } dst_release(&rt1->dst); } break; } #endif } return ret; }
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { struct net *net = dev_net(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi4 fl4 = {}; unsigned long orefdst; unsigned int hh_len; unsigned int type; type = inet_addr_type(net, iph->saddr); if (skb->sk && inet_sk(skb->sk)->transparent) type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { fl4.daddr = iph->daddr; if (type == RTN_LOCAL) fl4.saddr = iph->saddr; fl4.flowi4_tos = RT_TOS(iph->tos); fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; /* Drop old route. */ skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl4.daddr = iph->saddr; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return -1; orefdst = skb->_skb_refdst; if (ip_route_input(skb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->dst.dev) != 0) { dst_release(&rt->dst); return -1; } dst_release(&rt->dst); refdst_drop(orefdst); } if (skb_dst(skb)->error) return -1; #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); if (IS_ERR(dst)) return -1; skb_dst_set(skb, dst); } #endif /* Change in oif may mean change in hh_len. */ hh_len = skb_dst(skb)->dev->hard_header_len; if (skb_headroom(skb) < hh_len && pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) return -1; return 0; }