Beispiel #1
0
static bool
socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
{
	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
	struct sock *sk = skb->sk;

	if (!sk)
		sk = xt_socket_lookup_slow_v6(skb, par->in);
	if (sk) {
		bool wildcard;
		bool transparent = true;

		/* Ignore sockets listening on INADDR_ANY
		 * unless XT_SOCKET_NOWILDCARD is set
		 */
		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
			    sk_fullsock(sk) &&
			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));

		/* Ignore non-transparent sockets,
		 * if XT_SOCKET_TRANSPARENT is used
		 */
		if (info->flags & XT_SOCKET_TRANSPARENT)
			transparent = xt_socket_sk_is_transparent(sk);

		if (sk != skb->sk)
			sock_gen_put(sk);

		if (wildcard || !transparent)
			sk = NULL;
	}

	return sk != NULL;
}
Beispiel #2
0
static unsigned int ipv4_conntrack_defrag(void *priv,
					  struct sk_buff *skb,
					  const struct nf_hook_state *state)
{
	struct sock *sk = skb->sk;

	if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) &&
	    inet_sk(sk)->nodefrag)
		return NF_ACCEPT;

#if IS_ENABLED(CONFIG_NF_CONNTRACK)
#if !IS_ENABLED(CONFIG_NF_NAT)
	/* Previously seen (loopback)?  Ignore.  Do this before
	   fragment check. */
	if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
		return NF_ACCEPT;
#endif
#endif
	/* Gather fragments. */
	if (ip_is_fragment(ip_hdr(skb))) {
		enum ip_defrag_users user =
			nf_ct_defrag_user(state->hook, skb);

		if (nf_ct_ipv4_gather_frags(state->net, skb, user))
			return NF_STOLEN;
	}
	return NF_ACCEPT;
}
Beispiel #3
0
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb)
#endif
{
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
	struct sock *sk = skb->sk;
	int q_idx = sk_tx_queue_get(sk);

	if (q_idx < 0 || skb->ooo_okay ||
	    q_idx >= ndev->real_num_tx_queues) {
		u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
		int new_idx;

		new_idx = nvsc_dev->send_table[hash]
			% nvsc_dev->num_chn;

		if (q_idx != new_idx && sk &&
		    sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
			sk_tx_queue_set(sk, new_idx);

		q_idx = new_idx;
	}

	if (unlikely(!nvsc_dev->chan_table[q_idx].channel))
		q_idx = 0;
	
	return q_idx;
}
Beispiel #4
0
static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
{
	struct sock *sk = skb->sk;
	struct rtable *ort = skb_rtable(skb);

	if (!skb->dev && sk && sk_fullsock(sk))
		ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
Beispiel #5
0
/* "socket" match based redirection (no specific rule)
 * ===================================================
 *
 * There are connections with dynamic endpoints (e.g. FTP data
 * connection) that the user is unable to add explicit rules
 * for. These are taken care of by a generic "socket" rule. It is
 * assumed that the proxy application is trusted to open such
 * connections without explicit iptables rule (except of course the
 * generic 'socket' rule). In this case the following sockets are
 * matched in preference order:
 *
 *   - match: if there's a fully established connection matching the
 *     _packet_ tuple
 *
 *   - match: if there's a non-zero bound listener (possibly with a
 *     non-local address) We don't accept zero-bound listeners, since
 *     then local services could intercept traffic going through the
 *     box.
 */
static bool
socket_match(const struct sk_buff *skb, struct xt_action_param *par,
	     const struct xt_socket_mtinfo1 *info)
{
	struct sk_buff *pskb = (struct sk_buff *)skb;
	struct sock *sk = skb->sk;

	if (sk && !net_eq(xt_net(par), sock_net(sk)))
		sk = NULL;

	if (!sk)
		sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par));

	if (sk) {
		bool wildcard;
		bool transparent = true;

		/* Ignore sockets listening on INADDR_ANY,
		 * unless XT_SOCKET_NOWILDCARD is set
		 */
		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
			    sk_fullsock(sk) &&
			    inet_sk(sk)->inet_rcv_saddr == 0);

		/* Ignore non-transparent sockets,
		 * if XT_SOCKET_TRANSPARENT is used
		 */
		if (info->flags & XT_SOCKET_TRANSPARENT)
			transparent = inet_sk_transparent(sk);

		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
		    transparent && sk_fullsock(sk))
			pskb->mark = sk->sk_mark;

		if (sk != skb->sk)
			sock_gen_put(sk);

		if (wildcard || !transparent)
			sk = NULL;
	}

	return sk != NULL;
}
Beispiel #6
0
static void nft_socket_eval(const struct nft_expr *expr,
			    struct nft_regs *regs,
			    const struct nft_pktinfo *pkt)
{
	const struct nft_socket *priv = nft_expr_priv(expr);
	struct sk_buff *skb = pkt->skb;
	struct sock *sk = skb->sk;
	u32 *dest = &regs->data[priv->dreg];

	if (sk && !net_eq(nft_net(pkt), sock_net(sk)))
		sk = NULL;

	if (!sk)
		switch(nft_pf(pkt)) {
		case NFPROTO_IPV4:
			sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
			break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
		case NFPROTO_IPV6:
			sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
			break;
#endif
		default:
			WARN_ON_ONCE(1);
			regs->verdict.code = NFT_BREAK;
			return;
		}

	if (!sk) {
		regs->verdict.code = NFT_BREAK;
		return;
	}

	/* So that subsequent socket matching not to require other lookups. */
	skb->sk = sk;

	switch(priv->key) {
	case NFT_SOCKET_TRANSPARENT:
		nft_reg_store8(dest, inet_sk_transparent(sk));
		break;
	case NFT_SOCKET_MARK:
		if (sk_fullsock(sk)) {
			*dest = sk->sk_mark;
		} else {
			regs->verdict.code = NFT_BREAK;
			return;
		}
		break;
	default:
		WARN_ON(1);
		regs->verdict.code = NFT_BREAK;
	}
}
Beispiel #7
0
int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
{
    struct inet_sock *inet = inet_sk(sk);
    struct inet_diag_entry entry;

    if (!bc)
        return 1;

    entry.family = sk->sk_family;
    entry_fill_addrs(&entry, sk);
    entry.sport = inet->inet_num;
    entry.dport = ntohs(inet->inet_dport);
    entry.ifindex = sk->sk_bound_dev_if;
    entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
    if (sk_fullsock(sk))
        entry.mark = sk->sk_mark;
    else if (sk->sk_state == TCP_NEW_SYN_RECV)
        entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
    else
        entry.mark = 0;

    return inet_diag_bc_run(bc, &entry);
}
Beispiel #8
0
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk)
{
	if (!sk || !sk_fullsock(sk))
		return;

	read_lock_bh(&sk->sk_callback_lock);
	if (sk->sk_socket && sk->sk_socket->file) {
		const struct cred *cred = sk->sk_socket->file->f_cred;
		nf_log_buf_add(m, "UID=%u GID=%u ",
			from_kuid_munged(&init_user_ns, cred->fsuid),
			from_kgid_munged(&init_user_ns, cred->fsgid));
	}
	read_unlock_bh(&sk->sk_callback_lock);
}
Beispiel #9
0
int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
	struct inet_diag_entry entry;

	if (!bc)
		return 1;

	entry.family = sk->sk_family;
	entry_fill_addrs(&entry, sk);
	entry.sport = inet->inet_num;
	entry.dport = ntohs(inet->inet_dport);
	entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;

	return inet_diag_bc_run(bc, &entry);
}
Beispiel #10
0
static inline int netvsc_get_tx_queue(struct net_device *ndev,
				      struct sk_buff *skb, int old_idx)
{
	const struct net_device_context *ndc = netdev_priv(ndev);
	struct sock *sk = skb->sk;
	int q_idx;

	q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
				   (VRSS_SEND_TAB_SIZE - 1)];

	/* If queue index changed record the new value */
	if (q_idx != old_idx &&
	    sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
		sk_tx_queue_set(sk, q_idx);

	return q_idx;
}
Beispiel #11
0
/*
 * Select queue for transmit.
 *
 * If a valid queue has already been assigned, then use that.
 * Otherwise compute tx queue based on hash and the send table.
 *
 * This is basically similar to default (__netdev_pick_tx) with the added step
 * of using the host send_table when no other queue has been assigned.
 *
 * TODO support XPS - but get_xps_queue not exported
 */
static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
			void *accel_priv, select_queue_fallback_t fallback)
{
	struct net_device_context *net_device_ctx = netdev_priv(ndev);
	unsigned int num_tx_queues = ndev->real_num_tx_queues;
	struct sock *sk = skb->sk;
	int q_idx = sk_tx_queue_get(sk);

	if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) {
		u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE);
		int new_idx;

		new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues;

		if (q_idx != new_idx && sk &&
		    sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
			sk_tx_queue_set(sk, new_idx);

		q_idx = new_idx;
	}

	return q_idx;
}
Beispiel #12
0
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
		      struct user_namespace *user_ns,
		      u32 portid, u32 seq, u16 nlmsg_flags,
		      const struct nlmsghdr *unlh)
{
	const struct inet_sock *inet = inet_sk(sk);
	const struct tcp_congestion_ops *ca_ops;
	const struct inet_diag_handler *handler;
	int ext = req->idiag_ext;
	struct inet_diag_msg *r;
	struct nlmsghdr  *nlh;
	struct nlattr *attr;
	void *info = NULL;

	handler = inet_diag_table[req->sdiag_protocol];
	BUG_ON(!handler);

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	BUG_ON(!sk_fullsock(sk));

	inet_diag_msg_common_fill(r, sk);
	r->idiag_state = sk->sk_state;
	r->idiag_timer = 0;
	r->idiag_retrans = 0;

	if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
		goto errout;

	/* IPv6 dual-stack sockets use inet->tos for IPv4 connections,
	 * hence this needs to be included regardless of socket family.
	 */
	if (ext & (1 << (INET_DIAG_TOS - 1)))
		if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
			goto errout;

#if IS_ENABLED(CONFIG_IPV6)
	if (r->idiag_family == AF_INET6) {
		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
			if (nla_put_u8(skb, INET_DIAG_TCLASS,
				       inet6_sk(sk)->tclass) < 0)
				goto errout;

		if (((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) &&
		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, ipv6_only_sock(sk)))
			goto errout;
	}
#endif

	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
	r->idiag_inode = sock_i_ino(sk);

	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
		struct inet_diag_meminfo minfo = {
			.idiag_rmem = sk_rmem_alloc_get(sk),
			.idiag_wmem = sk->sk_wmem_queued,
			.idiag_fmem = sk->sk_forward_alloc,
			.idiag_tmem = sk_wmem_alloc_get(sk),
		};

		if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
			goto errout;
	}

	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
		if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
			goto errout;

	if (!icsk) {
		handler->idiag_get_info(sk, r, NULL);
		goto out;
	}

#define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)

	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
	    icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
		r->idiag_timer = 1;
		r->idiag_retrans = icsk->icsk_retransmits;
		r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
		r->idiag_timer = 4;
		r->idiag_retrans = icsk->icsk_probes_out;
		r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout);
	} else if (timer_pending(&sk->sk_timer)) {
		r->idiag_timer = 2;
		r->idiag_retrans = icsk->icsk_probes_out;
		r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires);
	} else {
		r->idiag_timer = 0;
		r->idiag_expires = 0;
	}
#undef EXPIRES_IN_MS

	if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
		attr = nla_reserve(skb, INET_DIAG_INFO,
				   handler->idiag_info_size);
		if (!attr)
			goto errout;

		info = nla_data(attr);
	}

	if (ext & (1 << (INET_DIAG_CONG - 1))) {
		int err = 0;

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops)
			err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
		rcu_read_unlock();
		if (err < 0)
			goto errout;
	}

	handler->idiag_get_info(sk, r, info);

	if (sk->sk_state < TCP_TIME_WAIT) {
		union tcp_cc_info info;
		size_t sz = 0;
		int attr;

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops && ca_ops->get_info)
			sz = ca_ops->get_info(sk, ext, &attr, &info);
		rcu_read_unlock();
		if (sz && nla_put(skb, attr, sz, &info) < 0)
			goto errout;
	}

out:
	nlmsg_end(skb, nlh);
	return 0;

errout:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);

static int inet_csk_diag_fill(struct sock *sk,
			      struct sk_buff *skb,
			      const struct inet_diag_req_v2 *req,
			      struct user_namespace *user_ns,
			      u32 portid, u32 seq, u16 nlmsg_flags,
			      const struct nlmsghdr *unlh)
{
	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req,
				 user_ns, portid, seq, nlmsg_flags, unlh);
}

static int inet_twsk_diag_fill(struct sock *sk,
			       struct sk_buff *skb,
			       u32 portid, u32 seq, u16 nlmsg_flags,
			       const struct nlmsghdr *unlh)
{
	struct inet_timewait_sock *tw = inet_twsk(sk);
	struct inet_diag_msg *r;
	struct nlmsghdr *nlh;
	long tmo;

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	BUG_ON(tw->tw_state != TCP_TIME_WAIT);

	tmo = tw->tw_timer.expires - jiffies;
	if (tmo < 0)
		tmo = 0;

	inet_diag_msg_common_fill(r, sk);
	r->idiag_retrans      = 0;

	r->idiag_state	      = tw->tw_substate;
	r->idiag_timer	      = 3;
	r->idiag_expires      = jiffies_to_msecs(tmo);
	r->idiag_rqueue	      = 0;
	r->idiag_wqueue	      = 0;
	r->idiag_uid	      = 0;
	r->idiag_inode	      = 0;

	nlmsg_end(skb, nlh);
	return 0;
}

static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
			      u32 portid, u32 seq, u16 nlmsg_flags,
			      const struct nlmsghdr *unlh)
{
	struct inet_diag_msg *r;
	struct nlmsghdr *nlh;
	long tmo;

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	inet_diag_msg_common_fill(r, sk);
	r->idiag_state = TCP_SYN_RECV;
	r->idiag_timer = 1;
	r->idiag_retrans = inet_reqsk(sk)->num_retrans;

	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
		     offsetof(struct sock, sk_cookie));

	tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
	r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
	r->idiag_rqueue	= 0;
	r->idiag_wqueue	= 0;
	r->idiag_uid	= 0;
	r->idiag_inode	= 0;

	nlmsg_end(skb, nlh);
	return 0;
}

static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
			const struct inet_diag_req_v2 *r,
			struct user_namespace *user_ns,
			u32 portid, u32 seq, u16 nlmsg_flags,
			const struct nlmsghdr *unlh)
{
	if (sk->sk_state == TCP_TIME_WAIT)
		return inet_twsk_diag_fill(sk, skb, portid, seq,
					   nlmsg_flags, unlh);

	if (sk->sk_state == TCP_NEW_SYN_RECV)
		return inet_req_diag_fill(sk, skb, portid, seq,
					  nlmsg_flags, unlh);

	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
				  nlmsg_flags, unlh);
}

struct sock *inet_diag_find_one_icsk(struct net *net,
				     struct inet_hashinfo *hashinfo,
				     const struct inet_diag_req_v2 *req)
{
	struct sock *sk;

	if (req->sdiag_family == AF_INET)
		sk = inet_lookup(net, hashinfo, req->id.idiag_dst[0],
				 req->id.idiag_dport, req->id.idiag_src[0],
				 req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
	else if (req->sdiag_family == AF_INET6) {
		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
			sk = inet_lookup(net, hashinfo, req->id.idiag_dst[3],
					 req->id.idiag_dport, req->id.idiag_src[3],
					 req->id.idiag_sport, req->id.idiag_if);
		else
			sk = inet6_lookup(net, hashinfo,
					  (struct in6_addr *)req->id.idiag_dst,
					  req->id.idiag_dport,
					  (struct in6_addr *)req->id.idiag_src,
					  req->id.idiag_sport,
					  req->id.idiag_if);
	}
#endif
	else
		return ERR_PTR(-EINVAL);

	if (!sk)
		return ERR_PTR(-ENOENT);

	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
		sock_gen_put(sk);
		return ERR_PTR(-ENOENT);
	}

	return sk;
}
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);

int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
			    struct sk_buff *in_skb,
			    const struct nlmsghdr *nlh,
			    const struct inet_diag_req_v2 *req)
{
	struct net *net = sock_net(in_skb->sk);
	struct sk_buff *rep;
	struct sock *sk;
	int err;

	sk = inet_diag_find_one_icsk(net, hashinfo, req);
	if (IS_ERR(sk))
		return PTR_ERR(sk);

	rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
	if (!rep) {
		err = -ENOMEM;
		goto out;
	}

	err = sk_diag_fill(sk, rep, req,
			   sk_user_ns(NETLINK_CB(in_skb).sk),
			   NETLINK_CB(in_skb).portid,
			   nlh->nlmsg_seq, 0, nlh);
	if (err < 0) {
		WARN_ON(err == -EMSGSIZE);
		nlmsg_free(rep);
		goto out;
	}
	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
			      MSG_DONTWAIT);
	if (err > 0)
		err = 0;

out:
	if (sk)
		sock_gen_put(sk);

	return err;
}
EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);

static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb,
			       const struct nlmsghdr *nlh,
			       const struct inet_diag_req_v2 *req)
{
	const struct inet_diag_handler *handler;
	int err;

	handler = inet_diag_lock_handler(req->sdiag_protocol);
	if (IS_ERR(handler))
		err = PTR_ERR(handler);
	else if (cmd == SOCK_DIAG_BY_FAMILY)
		err = handler->dump_one(in_skb, nlh, req);
	else if (cmd == SOCK_DESTROY_BACKPORT && handler->destroy)
		err = handler->destroy(in_skb, req);
	else
		err = -EOPNOTSUPP;
	inet_diag_unlock_handler(handler);

	return err;
}

static int bitstring_match(const __be32 *a1, const __be32 *a2, int bits)
{
	int words = bits >> 5;

	bits &= 0x1f;

	if (words) {
		if (memcmp(a1, a2, words << 2))
			return 0;
	}
	if (bits) {
		__be32 w1, w2;
		__be32 mask;

		w1 = a1[words];
		w2 = a2[words];

		mask = htonl((0xffffffff) << (32 - bits));

		if ((w1 ^ w2) & mask)
			return 0;
	}

	return 1;
}
Beispiel #13
0
void nft_meta_get_eval(const struct nft_expr *expr,
		       struct nft_regs *regs,
		       const struct nft_pktinfo *pkt)
{
	const struct nft_meta *priv = nft_expr_priv(expr);
	const struct sk_buff *skb = pkt->skb;
	const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
	struct sock *sk;
	u32 *dest = &regs->data[priv->dreg];
#ifdef CONFIG_NF_TABLES_BRIDGE
	const struct net_bridge_port *p;
#endif

	switch (priv->key) {
	case NFT_META_LEN:
		*dest = skb->len;
		break;
	case NFT_META_PROTOCOL:
		nft_reg_store16(dest, (__force u16)skb->protocol);
		break;
	case NFT_META_NFPROTO:
		nft_reg_store8(dest, nft_pf(pkt));
		break;
	case NFT_META_L4PROTO:
		if (!pkt->tprot_set)
			goto err;
		nft_reg_store8(dest, pkt->tprot);
		break;
	case NFT_META_PRIORITY:
		*dest = skb->priority;
		break;
	case NFT_META_MARK:
		*dest = skb->mark;
		break;
	case NFT_META_IIF:
		if (in == NULL)
			goto err;
		*dest = in->ifindex;
		break;
	case NFT_META_OIF:
		if (out == NULL)
			goto err;
		*dest = out->ifindex;
		break;
	case NFT_META_IIFNAME:
		if (in == NULL)
			goto err;
		strncpy((char *)dest, in->name, IFNAMSIZ);
		break;
	case NFT_META_OIFNAME:
		if (out == NULL)
			goto err;
		strncpy((char *)dest, out->name, IFNAMSIZ);
		break;
	case NFT_META_IIFTYPE:
		if (in == NULL)
			goto err;
		nft_reg_store16(dest, in->type);
		break;
	case NFT_META_OIFTYPE:
		if (out == NULL)
			goto err;
		nft_reg_store16(dest, out->type);
		break;
	case NFT_META_SKUID:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk) ||
		    !net_eq(nft_net(pkt), sock_net(sk)))
			goto err;

		read_lock_bh(&sk->sk_callback_lock);
		if (sk->sk_socket == NULL ||
		    sk->sk_socket->file == NULL) {
			read_unlock_bh(&sk->sk_callback_lock);
			goto err;
		}

		*dest =	from_kuid_munged(&init_user_ns,
				sk->sk_socket->file->f_cred->fsuid);
		read_unlock_bh(&sk->sk_callback_lock);
		break;
	case NFT_META_SKGID:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk) ||
		    !net_eq(nft_net(pkt), sock_net(sk)))
			goto err;

		read_lock_bh(&sk->sk_callback_lock);
		if (sk->sk_socket == NULL ||
		    sk->sk_socket->file == NULL) {
			read_unlock_bh(&sk->sk_callback_lock);
			goto err;
		}
		*dest =	from_kgid_munged(&init_user_ns,
				 sk->sk_socket->file->f_cred->fsgid);
		read_unlock_bh(&sk->sk_callback_lock);
		break;
#ifdef CONFIG_IP_ROUTE_CLASSID
	case NFT_META_RTCLASSID: {
		const struct dst_entry *dst = skb_dst(skb);

		if (dst == NULL)
			goto err;
		*dest = dst->tclassid;
		break;
	}
#endif
#ifdef CONFIG_NETWORK_SECMARK
	case NFT_META_SECMARK:
		*dest = skb->secmark;
		break;
#endif
	case NFT_META_PKTTYPE:
		if (skb->pkt_type != PACKET_LOOPBACK) {
			nft_reg_store8(dest, skb->pkt_type);
			break;
		}

		switch (nft_pf(pkt)) {
		case NFPROTO_IPV4:
			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
				nft_reg_store8(dest, PACKET_MULTICAST);
			else
				nft_reg_store8(dest, PACKET_BROADCAST);
			break;
		case NFPROTO_IPV6:
			nft_reg_store8(dest, PACKET_MULTICAST);
			break;
		case NFPROTO_NETDEV:
			switch (skb->protocol) {
			case htons(ETH_P_IP): {
				int noff = skb_network_offset(skb);
				struct iphdr *iph, _iph;

				iph = skb_header_pointer(skb, noff,
							 sizeof(_iph), &_iph);
				if (!iph)
					goto err;

				if (ipv4_is_multicast(iph->daddr))
					nft_reg_store8(dest, PACKET_MULTICAST);
				else
					nft_reg_store8(dest, PACKET_BROADCAST);

				break;
			}
			case htons(ETH_P_IPV6):
				nft_reg_store8(dest, PACKET_MULTICAST);
				break;
			default:
				WARN_ON_ONCE(1);
				goto err;
			}
			break;
		default:
			WARN_ON_ONCE(1);
			goto err;
		}
		break;
	case NFT_META_CPU:
		*dest = raw_smp_processor_id();
		break;
	case NFT_META_IIFGROUP:
		if (in == NULL)
			goto err;
		*dest = in->group;
		break;
	case NFT_META_OIFGROUP:
		if (out == NULL)
			goto err;
		*dest = out->group;
		break;
#ifdef CONFIG_CGROUP_NET_CLASSID
	case NFT_META_CGROUP:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk) ||
		    !net_eq(nft_net(pkt), sock_net(sk)))
			goto err;
		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
		break;
#endif
	case NFT_META_PRANDOM: {
		struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
		*dest = prandom_u32_state(state);
		break;
	}
#ifdef CONFIG_XFRM
	case NFT_META_SECPATH:
		nft_reg_store8(dest, secpath_exists(skb));
		break;
#endif
#ifdef CONFIG_NF_TABLES_BRIDGE
	case NFT_META_BRI_IIFNAME:
		if (in == NULL || (p = br_port_get_rcu(in)) == NULL)
			goto err;
		strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
		return;
	case NFT_META_BRI_OIFNAME:
		if (out == NULL || (p = br_port_get_rcu(out)) == NULL)
			goto err;
		strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
		return;
#endif
	default:
		WARN_ON(1);
		goto err;
	}
	return;

err:
	regs->verdict.code = NFT_BREAK;
}
Beispiel #14
0
void nft_meta_get_eval(const struct nft_expr *expr,
		       struct nft_regs *regs,
		       const struct nft_pktinfo *pkt)
{
	const struct nft_meta *priv = nft_expr_priv(expr);
	const struct sk_buff *skb = pkt->skb;
	const struct net_device *in = pkt->in, *out = pkt->out;
	struct sock *sk;
	u32 *dest = &regs->data[priv->dreg];

	switch (priv->key) {
	case NFT_META_LEN:
		*dest = skb->len;
		break;
	case NFT_META_PROTOCOL:
		*dest = 0;
		*(__be16 *)dest = skb->protocol;
		break;
	case NFT_META_NFPROTO:
		*dest = pkt->pf;
		break;
	case NFT_META_L4PROTO:
		*dest = pkt->tprot;
		break;
	case NFT_META_PRIORITY:
		*dest = skb->priority;
		break;
	case NFT_META_MARK:
		*dest = skb->mark;
		break;
	case NFT_META_IIF:
		if (in == NULL)
			goto err;
		*dest = in->ifindex;
		break;
	case NFT_META_OIF:
		if (out == NULL)
			goto err;
		*dest = out->ifindex;
		break;
	case NFT_META_IIFNAME:
		if (in == NULL)
			goto err;
		strncpy((char *)dest, in->name, IFNAMSIZ);
		break;
	case NFT_META_OIFNAME:
		if (out == NULL)
			goto err;
		strncpy((char *)dest, out->name, IFNAMSIZ);
		break;
	case NFT_META_IIFTYPE:
		if (in == NULL)
			goto err;
		*dest = 0;
		*(u16 *)dest = in->type;
		break;
	case NFT_META_OIFTYPE:
		if (out == NULL)
			goto err;
		*dest = 0;
		*(u16 *)dest = out->type;
		break;
	case NFT_META_SKUID:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk))
			goto err;

		read_lock_bh(&sk->sk_callback_lock);
		if (sk->sk_socket == NULL ||
		    sk->sk_socket->file == NULL) {
			read_unlock_bh(&sk->sk_callback_lock);
			goto err;
		}

		*dest =	from_kuid_munged(&init_user_ns,
				sk->sk_socket->file->f_cred->fsuid);
		read_unlock_bh(&sk->sk_callback_lock);
		break;
	case NFT_META_SKGID:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk))
			goto err;

		read_lock_bh(&sk->sk_callback_lock);
		if (sk->sk_socket == NULL ||
		    sk->sk_socket->file == NULL) {
			read_unlock_bh(&sk->sk_callback_lock);
			goto err;
		}
		*dest =	from_kgid_munged(&init_user_ns,
				 sk->sk_socket->file->f_cred->fsgid);
		read_unlock_bh(&sk->sk_callback_lock);
		break;
#ifdef CONFIG_IP_ROUTE_CLASSID
	case NFT_META_RTCLASSID: {
		const struct dst_entry *dst = skb_dst(skb);

		if (dst == NULL)
			goto err;
		*dest = dst->tclassid;
		break;
	}
#endif
#ifdef CONFIG_NETWORK_SECMARK
	case NFT_META_SECMARK:
		*dest = skb->secmark;
		break;
#endif
	case NFT_META_PKTTYPE:
		if (skb->pkt_type != PACKET_LOOPBACK) {
			*dest = skb->pkt_type;
			break;
		}

		switch (pkt->pf) {
		case NFPROTO_IPV4:
			if (ipv4_is_multicast(ip_hdr(skb)->daddr))
				*dest = PACKET_MULTICAST;
			else
				*dest = PACKET_BROADCAST;
			break;
		case NFPROTO_IPV6:
			if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF)
				*dest = PACKET_MULTICAST;
			else
				*dest = PACKET_BROADCAST;
			break;
		default:
			WARN_ON(1);
			goto err;
		}
		break;
	case NFT_META_CPU:
		*dest = raw_smp_processor_id();
		break;
	case NFT_META_IIFGROUP:
		if (in == NULL)
			goto err;
		*dest = in->group;
		break;
	case NFT_META_OIFGROUP:
		if (out == NULL)
			goto err;
		*dest = out->group;
		break;
#ifdef CONFIG_CGROUP_NET_CLASSID
	case NFT_META_CGROUP:
		sk = skb_to_full_sk(skb);
		if (!sk || !sk_fullsock(sk))
			goto err;
		*dest = sock_cgroup_classid(&sk->sk_cgrp_data);
		break;
#endif
	default:
		WARN_ON(1);
		goto err;
	}
	return;

err:
	regs->verdict.code = NFT_BREAK;
}
Beispiel #15
0
static bool
socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
	static struct xt_socket_mtinfo1 xt_info_v0 = {
		.flags = 0,
	};

	return socket_match(skb, par, &xt_info_v0);
}

static bool
socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
	return socket_match(skb, par, par->matchinfo);
}

#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static bool
socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
{
	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
	struct sk_buff *pskb = (struct sk_buff *)skb;
	struct sock *sk = skb->sk;

	if (sk && !net_eq(xt_net(par), sock_net(sk)))
		sk = NULL;

	if (!sk)
		sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par));

	if (sk) {
		bool wildcard;
		bool transparent = true;

		/* Ignore sockets listening on INADDR_ANY
		 * unless XT_SOCKET_NOWILDCARD is set
		 */
		wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) &&
			    sk_fullsock(sk) &&
			    ipv6_addr_any(&sk->sk_v6_rcv_saddr));

		/* Ignore non-transparent sockets,
		 * if XT_SOCKET_TRANSPARENT is used
		 */
		if (info->flags & XT_SOCKET_TRANSPARENT)
			transparent = inet_sk_transparent(sk);

		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
		    transparent && sk_fullsock(sk))
			pskb->mark = sk->sk_mark;

		if (sk != skb->sk)
			sock_gen_put(sk);

		if (wildcard || !transparent)
			sk = NULL;
	}

	return sk != NULL;
}
#endif

static int socket_mt_enable_defrag(struct net *net, int family)
{
	switch (family) {
	case NFPROTO_IPV4:
		return nf_defrag_ipv4_enable(net);
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
	case NFPROTO_IPV6:
		return nf_defrag_ipv6_enable(net);
#endif
	}
	WARN_ONCE(1, "Unknown family %d\n", family);
	return 0;
}

static int socket_mt_v1_check(const struct xt_mtchk_param *par)
{
	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
	int err;

	err = socket_mt_enable_defrag(par->net, par->family);
	if (err)
		return err;

	if (info->flags & ~XT_SOCKET_FLAGS_V1) {
		pr_info_ratelimited("unknown flags 0x%x\n",
				    info->flags & ~XT_SOCKET_FLAGS_V1);
		return -EINVAL;
	}
	return 0;
}

static int socket_mt_v2_check(const struct xt_mtchk_param *par)
{
	const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo;
	int err;

	err = socket_mt_enable_defrag(par->net, par->family);
	if (err)
		return err;

	if (info->flags & ~XT_SOCKET_FLAGS_V2) {
		pr_info_ratelimited("unknown flags 0x%x\n",
				    info->flags & ~XT_SOCKET_FLAGS_V2);
		return -EINVAL;
	}
	return 0;
}

static int socket_mt_v3_check(const struct xt_mtchk_param *par)
{
	const struct xt_socket_mtinfo3 *info =
				    (struct xt_socket_mtinfo3 *)par->matchinfo;
	int err;

	err = socket_mt_enable_defrag(par->net, par->family);
	if (err)
		return err;
	if (info->flags & ~XT_SOCKET_FLAGS_V3) {
		pr_info_ratelimited("unknown flags 0x%x\n",
				    info->flags & ~XT_SOCKET_FLAGS_V3);
		return -EINVAL;
	}
	return 0;
}

static struct xt_match socket_mt_reg[] __read_mostly = {
	{
		.name		= "socket",
		.revision	= 0,
		.family		= NFPROTO_IPV4,
		.match		= socket_mt4_v0,
		.hooks		= (1 << NF_INET_PRE_ROUTING) |
				  (1 << NF_INET_LOCAL_IN),
		.me		= THIS_MODULE,
	},
	{
		.name		= "socket",
		.revision	= 1,
		.family		= NFPROTO_IPV4,
		.match		= socket_mt4_v1_v2_v3,
		.checkentry	= socket_mt_v1_check,
		.matchsize	= sizeof(struct xt_socket_mtinfo1),
		.hooks		= (1 << NF_INET_PRE_ROUTING) |
				  (1 << NF_INET_LOCAL_IN),
		.me		= THIS_MODULE,
	},
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
	{
		.name		= "socket",
Beispiel #16
0
/* This is an inline function, we don't really care about a long
 * list of arguments */
static inline int
__build_packet_message(struct nfnl_log_net *log,
			struct nfulnl_instance *inst,
			const struct sk_buff *skb,
			unsigned int data_len,
			u_int8_t pf,
			unsigned int hooknum,
			const struct net_device *indev,
			const struct net_device *outdev,
			const char *prefix, unsigned int plen,
			const struct nfnl_ct_hook *nfnl_ct,
			struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
	struct nfulnl_msg_packet_hdr pmsg;
	struct nlmsghdr *nlh;
	struct nfgenmsg *nfmsg;
	sk_buff_data_t old_tail = inst->skb->tail;
	struct sock *sk;
	const unsigned char *hwhdrp;

	nlh = nlmsg_put(inst->skb, 0, 0,
			nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
			sizeof(struct nfgenmsg), 0);
	if (!nlh)
		return -1;
	nfmsg = nlmsg_data(nlh);
	nfmsg->nfgen_family = pf;
	nfmsg->version = NFNETLINK_V0;
	nfmsg->res_id = htons(inst->group_num);

	memset(&pmsg, 0, sizeof(pmsg));
	pmsg.hw_protocol	= skb->protocol;
	pmsg.hook		= hooknum;

	if (nla_put(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg))
		goto nla_put_failure;

	if (prefix &&
	    nla_put(inst->skb, NFULA_PREFIX, plen, prefix))
		goto nla_put_failure;

	if (indev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
		if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
				 htonl(indev->ifindex)))
			goto nla_put_failure;
#else
		if (pf == PF_BRIDGE) {
			/* Case 1: outdev is physical input device, we need to
			 * look for bridge group (when called from
			 * netfilter_bridge) */
			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
					 htonl(indev->ifindex)) ||
			/* this is the bridge group "brX" */
			/* rcu_read_lock()ed by nf_hook_thresh or
			 * nf_log_packet.
			 */
			    nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
					 htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
				goto nla_put_failure;
		} else {
			struct net_device *physindev;

			/* Case 2: indev is bridge group, we need to look for
			 * physical device (when called from ipv4) */
			if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
					 htonl(indev->ifindex)))
				goto nla_put_failure;

			physindev = nf_bridge_get_physindev(skb);
			if (physindev &&
			    nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
					 htonl(physindev->ifindex)))
				goto nla_put_failure;
		}
#endif
	}

	if (outdev) {
#if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
		if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
				 htonl(outdev->ifindex)))
			goto nla_put_failure;
#else
		if (pf == PF_BRIDGE) {
			/* Case 1: outdev is physical output device, we need to
			 * look for bridge group (when called from
			 * netfilter_bridge) */
			if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
					 htonl(outdev->ifindex)) ||
			/* this is the bridge group "brX" */
			/* rcu_read_lock()ed by nf_hook_thresh or
			 * nf_log_packet.
			 */
			    nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
					 htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
				goto nla_put_failure;
		} else {
			struct net_device *physoutdev;

			/* Case 2: indev is a bridge group, we need to look
			 * for physical device (when called from ipv4) */
			if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
					 htonl(outdev->ifindex)))
				goto nla_put_failure;

			physoutdev = nf_bridge_get_physoutdev(skb);
			if (physoutdev &&
			    nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
					 htonl(physoutdev->ifindex)))
				goto nla_put_failure;
		}
#endif
	}

	if (skb->mark &&
	    nla_put_be32(inst->skb, NFULA_MARK, htonl(skb->mark)))
		goto nla_put_failure;

	if (indev && skb->dev &&
	    skb->mac_header != skb->network_header) {
		struct nfulnl_msg_packet_hw phw;
		int len;

		memset(&phw, 0, sizeof(phw));
		len = dev_parse_header(skb, phw.hw_addr);
		if (len > 0) {
			phw.hw_addrlen = htons(len);
			if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
				goto nla_put_failure;
		}
	}

	if (indev && skb_mac_header_was_set(skb)) {
		if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
		    nla_put_be16(inst->skb, NFULA_HWLEN,
				 htons(skb->dev->hard_header_len)))
			goto nla_put_failure;

		hwhdrp = skb_mac_header(skb);

		if (skb->dev->type == ARPHRD_SIT)
			hwhdrp -= ETH_HLEN;

		if (hwhdrp >= skb->head &&
		    nla_put(inst->skb, NFULA_HWHEADER,
			    skb->dev->hard_header_len, hwhdrp))
			goto nla_put_failure;
	}

	if (skb->tstamp) {
		struct nfulnl_msg_packet_timestamp ts;
		struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
		ts.sec = cpu_to_be64(kts.tv_sec);
		ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);

		if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
			goto nla_put_failure;
	}

	/* UID */
	sk = skb->sk;
	if (sk && sk_fullsock(sk)) {
		read_lock_bh(&sk->sk_callback_lock);
		if (sk->sk_socket && sk->sk_socket->file) {
			struct file *file = sk->sk_socket->file;
			const struct cred *cred = file->f_cred;
			struct user_namespace *user_ns = inst->peer_user_ns;
			__be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid));
			__be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid));
			read_unlock_bh(&sk->sk_callback_lock);
			if (nla_put_be32(inst->skb, NFULA_UID, uid) ||
			    nla_put_be32(inst->skb, NFULA_GID, gid))
				goto nla_put_failure;
		} else
			read_unlock_bh(&sk->sk_callback_lock);
	}

	/* local sequence number */
	if ((inst->flags & NFULNL_CFG_F_SEQ) &&
	    nla_put_be32(inst->skb, NFULA_SEQ, htonl(inst->seq++)))
		goto nla_put_failure;

	/* global sequence number */
	if ((inst->flags & NFULNL_CFG_F_SEQ_GLOBAL) &&
	    nla_put_be32(inst->skb, NFULA_SEQ_GLOBAL,
			 htonl(atomic_inc_return(&log->global_seq))))
		goto nla_put_failure;

	if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
				 NFULA_CT, NFULA_CT_INFO) < 0)
		goto nla_put_failure;

	if (data_len) {
		struct nlattr *nla;
		int size = nla_attr_size(data_len);

		if (skb_tailroom(inst->skb) < nla_total_size(data_len))
			goto nla_put_failure;

		nla = skb_put(inst->skb, nla_total_size(data_len));
		nla->nla_type = NFULA_PAYLOAD;
		nla->nla_len = size;

		if (skb_copy_bits(skb, 0, nla_data(nla), data_len))
			BUG();
	}

	nlh->nlmsg_len = inst->skb->tail - old_tail;
	return 0;

nla_put_failure:
	PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n");
	return -1;
}
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
		      struct sk_buff *skb, const struct inet_diag_req_v2 *req,
		      struct user_namespace *user_ns,
		      u32 portid, u32 seq, u16 nlmsg_flags,
		      const struct nlmsghdr *unlh,
		      bool net_admin)
{
	const struct tcp_congestion_ops *ca_ops;
	const struct inet_diag_handler *handler;
	int ext = req->idiag_ext;
	struct inet_diag_msg *r;
	struct nlmsghdr  *nlh;
	struct nlattr *attr;
	void *info = NULL;

	handler = inet_diag_table[req->sdiag_protocol];
	BUG_ON(!handler);

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	BUG_ON(!sk_fullsock(sk));

	inet_diag_msg_common_fill(r, sk);
	r->idiag_state = sk->sk_state;
	r->idiag_timer = 0;
	r->idiag_retrans = 0;

	if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
		goto errout;

	if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
		struct inet_diag_meminfo minfo = {
			.idiag_rmem = sk_rmem_alloc_get(sk),
			.idiag_wmem = sk->sk_wmem_queued,
			.idiag_fmem = sk->sk_forward_alloc,
			.idiag_tmem = sk_wmem_alloc_get(sk),
		};

		if (nla_put(skb, INET_DIAG_MEMINFO, sizeof(minfo), &minfo) < 0)
			goto errout;
	}

	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
		if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
			goto errout;

	/*
	 * RAW sockets might have user-defined protocols assigned,
	 * so report the one supplied on socket creation.
	 */
	if (sk->sk_type == SOCK_RAW) {
		if (nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))
			goto errout;
	}

	if (!icsk) {
		handler->idiag_get_info(sk, r, NULL);
		goto out;
	}

	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
		r->idiag_timer = 1;
		r->idiag_retrans = icsk->icsk_retransmits;
		r->idiag_expires =
			jiffies_to_msecs(icsk->icsk_timeout - jiffies);
	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
		r->idiag_timer = 4;
		r->idiag_retrans = icsk->icsk_probes_out;
		r->idiag_expires =
			jiffies_to_msecs(icsk->icsk_timeout - jiffies);
	} else if (timer_pending(&sk->sk_timer)) {
		r->idiag_timer = 2;
		r->idiag_retrans = icsk->icsk_probes_out;
		r->idiag_expires =
			jiffies_to_msecs(sk->sk_timer.expires - jiffies);
	} else {
		r->idiag_timer = 0;
		r->idiag_expires = 0;
	}

	if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
		attr = nla_reserve_64bit(skb, INET_DIAG_INFO,
					 handler->idiag_info_size,
					 INET_DIAG_PAD);
		if (!attr)
			goto errout;

		info = nla_data(attr);
	}

	if (ext & (1 << (INET_DIAG_CONG - 1))) {
		int err = 0;

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops)
			err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name);
		rcu_read_unlock();
		if (err < 0)
			goto errout;
	}

	handler->idiag_get_info(sk, r, info);

	if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux)
		if (handler->idiag_get_aux(sk, net_admin, skb) < 0)
			goto errout;

	if (sk->sk_state < TCP_TIME_WAIT) {
		union tcp_cc_info info;
		size_t sz = 0;
		int attr;

		rcu_read_lock();
		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
		if (ca_ops && ca_ops->get_info)
			sz = ca_ops->get_info(sk, ext, &attr, &info);
		rcu_read_unlock();
		if (sz && nla_put(skb, attr, sz, &info) < 0)
			goto errout;
	}

	if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) {
		u32 classid = 0;

#ifdef CONFIG_SOCK_CGROUP_DATA
		classid = sock_cgroup_classid(&sk->sk_cgrp_data);
#endif

		if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid))
			goto errout;
	}

out:
	nlmsg_end(skb, nlh);
	return 0;

errout:
	nlmsg_cancel(skb, nlh);
	return -EMSGSIZE;
}
EXPORT_SYMBOL_GPL(inet_sk_diag_fill);

static int inet_csk_diag_fill(struct sock *sk,
			      struct sk_buff *skb,
			      const struct inet_diag_req_v2 *req,
			      struct user_namespace *user_ns,
			      u32 portid, u32 seq, u16 nlmsg_flags,
			      const struct nlmsghdr *unlh,
			      bool net_admin)
{
	return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns,
				 portid, seq, nlmsg_flags, unlh, net_admin);
}

static int inet_twsk_diag_fill(struct sock *sk,
			       struct sk_buff *skb,
			       u32 portid, u32 seq, u16 nlmsg_flags,
			       const struct nlmsghdr *unlh)
{
	struct inet_timewait_sock *tw = inet_twsk(sk);
	struct inet_diag_msg *r;
	struct nlmsghdr *nlh;
	long tmo;

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	BUG_ON(tw->tw_state != TCP_TIME_WAIT);

	tmo = tw->tw_timer.expires - jiffies;
	if (tmo < 0)
		tmo = 0;

	inet_diag_msg_common_fill(r, sk);
	r->idiag_retrans      = 0;

	r->idiag_state	      = tw->tw_substate;
	r->idiag_timer	      = 3;
	r->idiag_expires      = jiffies_to_msecs(tmo);
	r->idiag_rqueue	      = 0;
	r->idiag_wqueue	      = 0;
	r->idiag_uid	      = 0;
	r->idiag_inode	      = 0;

	nlmsg_end(skb, nlh);
	return 0;
}

static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
			      u32 portid, u32 seq, u16 nlmsg_flags,
			      const struct nlmsghdr *unlh, bool net_admin)
{
	struct request_sock *reqsk = inet_reqsk(sk);
	struct inet_diag_msg *r;
	struct nlmsghdr *nlh;
	long tmo;

	nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
			nlmsg_flags);
	if (!nlh)
		return -EMSGSIZE;

	r = nlmsg_data(nlh);
	inet_diag_msg_common_fill(r, sk);
	r->idiag_state = TCP_SYN_RECV;
	r->idiag_timer = 1;
	r->idiag_retrans = reqsk->num_retrans;

	BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
		     offsetof(struct sock, sk_cookie));

	tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies;
	r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
	r->idiag_rqueue	= 0;
	r->idiag_wqueue	= 0;
	r->idiag_uid	= 0;
	r->idiag_inode	= 0;

	if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
				     inet_rsk(reqsk)->ir_mark))
		return -EMSGSIZE;

	nlmsg_end(skb, nlh);
	return 0;
}

static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
			const struct inet_diag_req_v2 *r,
			struct user_namespace *user_ns,
			u32 portid, u32 seq, u16 nlmsg_flags,
			const struct nlmsghdr *unlh, bool net_admin)
{
	if (sk->sk_state == TCP_TIME_WAIT)
		return inet_twsk_diag_fill(sk, skb, portid, seq,
					   nlmsg_flags, unlh);

	if (sk->sk_state == TCP_NEW_SYN_RECV)
		return inet_req_diag_fill(sk, skb, portid, seq,
					  nlmsg_flags, unlh, net_admin);

	return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
				  nlmsg_flags, unlh, net_admin);
}

struct sock *inet_diag_find_one_icsk(struct net *net,
				     struct inet_hashinfo *hashinfo,
				     const struct inet_diag_req_v2 *req)
{
	struct sock *sk;

	rcu_read_lock();
	if (req->sdiag_family == AF_INET)
		sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[0],
				 req->id.idiag_dport, req->id.idiag_src[0],
				 req->id.idiag_sport, req->id.idiag_if);
#if IS_ENABLED(CONFIG_IPV6)
	else if (req->sdiag_family == AF_INET6) {
		if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
		    ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
			sk = inet_lookup(net, hashinfo, NULL, 0, req->id.idiag_dst[3],
					 req->id.idiag_dport, req->id.idiag_src[3],
					 req->id.idiag_sport, req->id.idiag_if);
		else
			sk = inet6_lookup(net, hashinfo, NULL, 0,
					  (struct in6_addr *)req->id.idiag_dst,
					  req->id.idiag_dport,
					  (struct in6_addr *)req->id.idiag_src,
					  req->id.idiag_sport,
					  req->id.idiag_if);
	}
#endif
	else {
		rcu_read_unlock();
		return ERR_PTR(-EINVAL);
	}
	rcu_read_unlock();
	if (!sk)
		return ERR_PTR(-ENOENT);

	if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
		sock_gen_put(sk);
		return ERR_PTR(-ENOENT);
	}

	return sk;
}
EXPORT_SYMBOL_GPL(inet_diag_find_one_icsk);

int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
			    struct sk_buff *in_skb,
			    const struct nlmsghdr *nlh,
			    const struct inet_diag_req_v2 *req)
{
	bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
	struct net *net = sock_net(in_skb->sk);
	struct sk_buff *rep;
	struct sock *sk;
	int err;

	sk = inet_diag_find_one_icsk(net, hashinfo, req);
	if (IS_ERR(sk))
		return PTR_ERR(sk);

	rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL);
	if (!rep) {
		err = -ENOMEM;
		goto out;
	}

	err = sk_diag_fill(sk, rep, req,
			   sk_user_ns(NETLINK_CB(in_skb).sk),
			   NETLINK_CB(in_skb).portid,
			   nlh->nlmsg_seq, 0, nlh, net_admin);
	if (err < 0) {
		WARN_ON(err == -EMSGSIZE);
		nlmsg_free(rep);
		goto out;
	}
	err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid,
			      MSG_DONTWAIT);
	if (err > 0)
		err = 0;

out:
	if (sk)
		sock_gen_put(sk);

	return err;
}