int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { struct net_device *dev; int rc = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; goto out; } dev = dev_get_by_index_rcu(net, fl->flowi_iif); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_fib_table) { arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); rc = 1; goto out; } out: rcu_read_unlock(); return rc; }
void l3mdev_update_flow(struct net *net, struct flowi *fl) { struct net_device *dev; int ifindex; rcu_read_lock(); if (fl->flowi_oif) { dev = dev_get_by_index_rcu(net, fl->flowi_oif); if (dev) { ifindex = l3mdev_master_ifindex_rcu(dev); if (ifindex) { fl->flowi_oif = ifindex; fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; goto out; } } } if (fl->flowi_iif) { dev = dev_get_by_index_rcu(net, fl->flowi_iif); if (dev) { ifindex = l3mdev_master_ifindex_rcu(dev); if (ifindex) { fl->flowi_iif = ifindex; fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; } } } out: rcu_read_unlock(); }
/* * socket leave an anycast group */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); write_lock_bh(&ipv6_sk_ac_lock); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && ipv6_addr_equal(&pac->acl_addr, addr)) break; prev_pac = pac; } if (!pac) { write_unlock_bh(&ipv6_sk_ac_lock); return -ENOENT; } if (prev_pac) prev_pac->acl_next = pac->acl_next; else np->ipv6_ac_list = pac->acl_next; write_unlock_bh(&ipv6_sk_ac_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); rcu_read_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; }
void ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int prev_index; write_lock_bh(&ipv6_sk_ac_lock); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; write_unlock_bh(&ipv6_sk_ac_lock); prev_index = 0; rcu_read_lock(); while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { dev = dev_get_by_index_rcu(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } rcu_read_unlock(); }
/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(unsigned long arg) { struct ipq *qp; struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if (!(qp->q.flags & INET_FRAG_EVICTED)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) goto out_rcu_unlock; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (qp->user == IP_DEFRAG_AF_PACKET || ((qp->user >= IP_DEFRAG_CONNTRACK_IN) && (qp->user <= __IP_DEFRAG_CONNTRACK_IN_END) && (skb_rtable(head)->rt_type != RTN_LOCAL))) goto out_rcu_unlock; /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); } out: spin_unlock(&qp->q.lock); ipq_put(qp); }
static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em, struct tcf_pkt_info *info) { struct ip_set_adt_opt opt; struct xt_action_param acpar; const struct xt_set_info *set = (const void *) em->data; struct net_device *dev, *indev = NULL; int ret, network_offset; switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): acpar.family = NFPROTO_IPV4; if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) return 0; acpar.thoff = ip_hdrlen(skb); break; case htons(ETH_P_IPV6): acpar.family = NFPROTO_IPV6; if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) return 0; /* doesn't call ipv6_find_hdr() because ipset doesn't use thoff, yet */ acpar.thoff = sizeof(struct ipv6hdr); break; default: return 0; } acpar.hooknum = 0; opt.family = acpar.family; opt.dim = set->dim; opt.flags = set->flags; opt.cmdflags = 0; opt.ext.timeout = ~0u; network_offset = skb_network_offset(skb); skb_pull(skb, network_offset); dev = skb->dev; rcu_read_lock(); if (skb->skb_iif) indev = dev_get_by_index_rcu(em->net, skb->skb_iif); acpar.in = indev ? indev : dev; acpar.out = dev; ret = ip_set_test(set->index, skb, &acpar, &opt); rcu_read_unlock(); skb_push(skb, network_offset); return ret; }
/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(unsigned long arg) { struct ipq *qp; struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); if (qp->q.last_in & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; /* * Only search router table for the head fragment, * when defraging timeout at PRE_ROUTING HOOK. */ if (qp->user == IP_DEFRAG_CONNTRACK_IN && !skb_dst(head)) { const struct iphdr *iph = ip_hdr(head); int err = ip_route_input(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (unlikely(err)) goto out_rcu_unlock; /* * Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (skb_rtable(head)->rt_type != RTN_LOCAL) goto out_rcu_unlock; } /* Send an ICMP "Fragment Reassembly Timeout" message. */ icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); } out: spin_unlock(&qp->q.lock); ipq_put(qp); }
struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) in_dev = in_dev_get(dev); rcu_read_unlock(); return in_dev; }
static int get_netdev_from_ifindex(struct net *net, int if_index, struct ib_gid_attr *gid_attr_val) { if (if_index && net) { rcu_read_lock(); gid_attr_val->ndev = dev_get_by_index_rcu(net, if_index); rcu_read_unlock(); if (gid_attr_val->ndev) return GID_ATTR_FIND_MASK_NETDEV; } return 0; }
/* Caller must hold RCU or RTNL : * We dont take a reference on found in_device */ struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; struct in_device *in_dev = NULL; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) in_dev = rcu_dereference_rtnl(dev->ip_ptr); rcu_read_unlock(); return in_dev; }
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct flow_offload_tuple_rhash *tuplehash; struct nf_flowtable *flow_table = priv; struct flow_offload_tuple tuple = {}; enum flow_offload_tuple_dir dir; struct flow_offload *flow; struct net_device *outdev; struct in6_addr *nexthop; struct ipv6hdr *ip6h; struct rt6_info *rt; if (skb->protocol != htons(ETH_P_IPV6)) return NF_ACCEPT; if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0) return NF_ACCEPT; tuplehash = flow_offload_lookup(flow_table, &tuple); if (tuplehash == NULL) return NF_ACCEPT; outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx); if (!outdev) return NF_ACCEPT; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache; if (unlikely(nf_flow_exceeds_mtu(skb, rt))) return NF_ACCEPT; if (skb_try_make_writable(skb, sizeof(*ip6h))) return NF_DROP; if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) && nf_flow_nat_ipv6(flow, skb, dir) < 0) return NF_DROP; flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT; ip6h = ipv6_hdr(skb); ip6h->hop_limit--; skb->dev = outdev; nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); return NF_STOLEN; }
int pfq_dev_refcnt_read_by_index(struct net *net, int ifindex) { struct net_device *dev; int ref = -1; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) { ref = netdev_refcnt_read(dev); } rcu_read_unlock(); return ref; }
static void ip_expire(unsigned long arg) { struct ipq *qp; struct net *net; qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); spin_lock(&qp->q.lock); if (qp->q.last_in & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; rcu_read_lock(); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out_rcu_unlock; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) goto out_rcu_unlock; if (qp->user == IP_DEFRAG_AF_PACKET || (qp->user == IP_DEFRAG_CONNTRACK_IN && skb_rtable(head)->rt_type != RTN_LOCAL)) goto out_rcu_unlock; icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); } out: spin_unlock(&qp->q.lock); ipq_put(qp); }
/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */ static struct datapath *get_dp(int dp_ifindex) { struct datapath *dp = NULL; struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(&init_net, dp_ifindex); if (dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); if (vport) dp = vport->dp; } rcu_read_unlock(); return dp; }
u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) { struct net_device *dev; u32 tb_id = 0; if (!ifindex) return 0; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) tb_id = l3mdev_fib_table_rcu(dev); rcu_read_unlock(); return tb_id; }
static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq; struct net_device *dev = NULL; struct net *net; fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; fq_kill(fq); net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; /* But use as source device on which LAST ARRIVED segment was received. And do not use fq->dev pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); fq_put(fq); }
void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, struct inet_frags *frags) { struct net_device *dev = NULL; spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out_rcu_unlock; __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); /* Don't send error if the first segment did not arrive. */ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; /* But use as source device on which LAST ARRIVED * segment was received. And do not use fq->dev * pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); inet_frag_put(&fq->q, frags); }
struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; if (fl6->flowi6_oif) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); if (dev && netif_is_l3_slave(dev)) dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_link_scope_lookup) dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; }
static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq; struct net_device *dev = NULL; struct net *net; fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; fq_kill(fq); net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) goto out_rcu_unlock; fq->q.fragments->dev = dev; icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); fq_put(fq); }
static void ri_tasklet(unsigned long dev) { struct net_device *_dev = (struct net_device *)dev; struct ifb_private *dp = netdev_priv(_dev); struct net_device_stats *stats = &_dev->stats; struct netdev_queue *txq; struct sk_buff *skb; txq = netdev_get_tx_queue(_dev, 0); if ((skb = skb_peek(&dp->tq)) == NULL) { if (__netif_tx_trylock(txq)) { skb_queue_splice_tail_init(&dp->rq, &dp->tq); __netif_tx_unlock(txq); } else { /* reschedule */ goto resched; } } while ((skb = __skb_dequeue(&dp->tq)) != NULL) { u32 from = G_TC_FROM(skb->tc_verd); skb->tc_verd = 0; skb->tc_verd = SET_TC_NCLS(skb->tc_verd); stats->tx_packets++; stats->tx_bytes +=skb->len; rcu_read_lock(); skb->dev = dev_get_by_index_rcu(&init_net, skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); stats->tx_dropped++; if (skb_queue_len(&dp->tq) != 0) goto resched; break; } rcu_read_unlock(); skb->skb_iif = _dev->ifindex; if (from & AT_EGRESS) { dev_queue_xmit(skb); } else if (from & AT_INGRESS) { skb_pull(skb, skb->dev->hard_header_len); netif_receive_skb(skb); } else BUG(); } if (__netif_tx_trylock(txq)) { if ((skb = skb_peek(&dp->rq)) == NULL) { dp->tasklet_pending = 0; if (netif_queue_stopped(_dev)) netif_wake_queue(_dev); } else { __netif_tx_unlock(txq); goto resched; } __netif_tx_unlock(txq); } else { resched: dp->tasklet_pending = 1; tasklet_schedule(&dp->ifb_tasklet); } }
/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct ipq *qp; struct net *net; qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); if (!inet_frag_evicting(&qp->q)) { struct sk_buff *clone, *head = qp->q.fragments; const struct iphdr *iph; int err; __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments) goto out; head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; clone = skb_clone(head, GFP_ATOMIC); /* Send an ICMP "Fragment Reassembly Timeout" message. */ if (clone) { spin_unlock(&qp->q.lock); icmp_send(clone, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); consume_skb(clone); goto out_rcu_unlock; } } out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); ipq_put(qp); }
int ip6_mc_input(struct sk_buff *skb) { int sdif = inet6_sdif(skb); const struct ipv6hdr *hdr; struct net_device *dev; bool deliver; __IP6_UPD_PO_STATS(dev_net(skb_dst(skb)->dev), __in6_dev_get_safely(skb->dev), IPSTATS_MIB_INMCAST, skb->len); /* skb->dev passed may be master dev for vrfs. */ if (sdif) { rcu_read_lock(); dev = dev_get_by_index_rcu(dev_net(skb->dev), sdif); if (!dev) { rcu_read_unlock(); kfree_skb(skb); return -ENODEV; } } else { dev = skb->dev; } hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(dev, &hdr->daddr, NULL); if (sdif) rcu_read_unlock(); #ifdef CONFIG_IPV6_MROUTE /* * IPv6 multicast router mode is now supported ;) */ if (dev_net(skb->dev)->ipv6.devconf_all->mc_forwarding && !(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL)) && likely(!(IP6CB(skb)->flags & IP6SKB_FORWARDED))) { /* * Okay, we try to forward - split and duplicate * packets. */ struct sk_buff *skb2; struct inet6_skb_parm *opt = IP6CB(skb); /* Check for MLD */ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { /* Check if this is a mld message */ u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; /* Check if the value of Router Alert * is for MLD (0x0000). */ if (opt->ra == htons(IPV6_OPT_ROUTERALERT_MLD)) { deliver = false; if (!ipv6_ext_hdr(nexthdr)) { /* BUG */ goto out; } offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) goto out; if (ipv6_is_mld(skb, nexthdr, offset)) deliver = true; goto out; } /* unknown RA - process it normally */ } if (deliver) skb2 = skb_clone(skb, GFP_ATOMIC); else { skb2 = skb; skb = NULL; } if (skb2) { ip6_mr_input(skb2); } } out: #endif if (likely(deliver)) ip6_input(skb); else { /* discard */ kfree_skb(skb); } return 0; }
static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; bool needs_rtnl = setsockopt_needs_rtnl(optname); if (!optval) val = 0; else { if (optlen >= sizeof(int)) { if (get_user(val, (int __user *) optval)) return -EFAULT; } else val = 0; } valbool = (val != 0); if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); if (needs_rtnl) rtnl_lock(); lock_sock(sk); switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { struct ipv6_txoptions *opt; struct sk_buff *pktopt; if (sk->sk_type == SOCK_RAW) break; if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE) { struct udp_sock *up = udp_sk(sk); if (up->pending == AF_INET6) { retv = -EBUSY; break; } } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; } if (ipv6_only_sock(sk) || !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) { retv = -EADDRNOTAVAIL; break; } fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so * remove it from the refcnt debug socks count in the * original family... */ sk_refcnt_debug_dec(sk); if (sk->sk_protocol == IPPROTO_TCP) { struct inet_connection_sock *icsk = inet_csk(sk); local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); local_bh_enable(); sk->sk_prot = &tcp_prot; icsk->icsk_af_ops = &ipv4_specific; sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { struct proto *prot = &udp_prot; if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); local_bh_enable(); sk->sk_prot = prot; sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); sk->sk_destruct = inet_sock_destruct; /* * ... and add it to the refcnt debug socks count * in the new family. -acme */ sk_refcnt_debug_inc(sk); module_put(THIS_MODULE); retv = 0; break; } goto e_inval; case IPV6_V6ONLY: if (optlen < sizeof(int) || inet_sk(sk)->inet_num) goto e_inval; sk->sk_ipv6only = valbool; retv = 0; break; case IPV6_RECVPKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxinfo = valbool; retv = 0; break; case IPV6_2292PKTINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxoinfo = valbool; retv = 0; break; case IPV6_RECVHOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxhlim = valbool; retv = 0; break; case IPV6_2292HOPLIMIT: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxohlim = valbool; retv = 0; break; case IPV6_RECVRTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.srcrt = valbool; retv = 0; break; case IPV6_2292RTHDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.osrcrt = valbool; retv = 0; break; case IPV6_RECVHOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.hopopts = valbool; retv = 0; break; case IPV6_2292HOPOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.ohopopts = valbool; retv = 0; break; case IPV6_RECVDSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.dstopts = valbool; retv = 0; break; case IPV6_2292DSTOPTS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.odstopts = valbool; retv = 0; break; case IPV6_TCLASS: if (optlen < sizeof(int)) goto e_inval; if (val < -1 || val > 0xff) goto e_inval; /* RFC 3542, 6.5: default traffic class of 0x0 */ if (val == -1) val = 0; np->tclass = val; retv = 0; break; case IPV6_RECVTCLASS: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxtclass = valbool; retv = 0; break; case IPV6_FLOWINFO: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxflow = valbool; retv = 0; break; case IPV6_RECVPATHMTU: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxpmtu = valbool; retv = 0; break; case IPV6_TRANSPARENT: if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) && !ns_capable(net->user_ns, CAP_NET_RAW)) { retv = -EPERM; break; } if (optlen < sizeof(int)) goto e_inval; /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */ inet_sk(sk)->transparent = valbool; retv = 0; break; case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; np->rxopt.bits.rxorigdstaddr = valbool; retv = 0; break; case IPV6_HOPOPTS: case IPV6_RTHDRDSTOPTS: case IPV6_RTHDR: case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; /* remove any sticky options header with a zero option * length, per RFC3542. */ if (optlen == 0) optval = NULL; else if (!optval) goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { retv = PTR_ERR(opt); break; } /* routing header option needs extra check */ retv = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (rthdr->hdrlen != 2 || rthdr->segments_left != 1) goto sticky_done; break; #endif case IPV6_SRCRT_TYPE_4: { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *) opt->srcrt; if (!seg6_validate_srh(srh, optlen)) goto sticky_done; break; } default: goto sticky_done; } } retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } break; } case IPV6_PKTINFO: { struct in6_pktinfo pkt; if (optlen == 0) goto e_inval; else if (optlen < sizeof(struct in6_pktinfo) || !optval) goto e_inval; if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { retv = -EFAULT; break; } if (sk->sk_bound_dev_if && pkt.ipi6_ifindex != sk->sk_bound_dev_if) goto e_inval; np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex; np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr; retv = 0; break; } case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; struct flowi6 fl6; struct sockcm_cookie sockc_junk; struct ipcm6_cookie ipc6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; if (optlen == 0) goto update; /* 1K is probably excessive * 1K is surely not enough, 2K per standard header is 16K. */ retv = -EINVAL; if (optlen > 64*1024) break; opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; if (!opt) break; memset(opt, 0, sizeof(*opt)); atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) goto done; msg.msg_controllen = optlen; msg.msg_control = (void *)(opt+1); ipc6.opt = opt; retv = ip6_datagram_send_ctl(net, sk, &msg, &fl6, &ipc6, &sockc_junk); if (retv) goto done; update: retv = 0; opt = ipv6_update_options(sk, opt); done: if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } break; } case IPV6_UNICAST_HOPS: if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) goto e_inval; np->hop_limit = val; retv = 0; break; case IPV6_MULTICAST_HOPS: if (sk->sk_type == SOCK_STREAM) break; if (optlen < sizeof(int)) goto e_inval; if (val > 255 || val < -1) goto e_inval; np->mcast_hops = (val == -1 ? IPV6_DEFAULT_MCASTHOPS : val); retv = 0; break; case IPV6_MULTICAST_LOOP: if (optlen < sizeof(int)) goto e_inval; if (val != valbool) goto e_inval; np->mc_loop = valbool; retv = 0; break; case IPV6_UNICAST_IF: { struct net_device *dev = NULL; int ifindex; if (optlen != sizeof(int)) goto e_inval; ifindex = (__force int)ntohl((__force __be32)val); if (ifindex == 0) { np->ucast_oif = 0; retv = 0; break; } dev = dev_get_by_index(net, ifindex); retv = -EADDRNOTAVAIL; if (!dev) break; dev_put(dev); retv = -EINVAL; if (sk->sk_bound_dev_if) break; np->ucast_oif = ifindex; retv = 0; break; } case IPV6_MULTICAST_IF: if (sk->sk_type == SOCK_STREAM) break; if (optlen < sizeof(int)) goto e_inval; if (val) { struct net_device *dev; int midx; rcu_read_lock(); dev = dev_get_by_index_rcu(net, val); if (!dev) { rcu_read_unlock(); retv = -ENODEV; break; } midx = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val && (!midx || midx != sk->sk_bound_dev_if)) goto e_inval; } np->mcast_oif = val; retv = 0; break; case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EPROTO; if (inet_sk(sk)->is_icsk) break; retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_ADD_MEMBERSHIP) retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); else retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr); break; } case IPV6_JOIN_ANYCAST: case IPV6_LEAVE_ANYCAST: { struct ipv6_mreq mreq; if (optlen < sizeof(struct ipv6_mreq)) goto e_inval; retv = -EFAULT; if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq))) break; if (optname == IPV6_JOIN_ANYCAST) retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); else retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr); break; } case MCAST_JOIN_GROUP: case MCAST_LEAVE_GROUP: { struct group_req greq; struct sockaddr_in6 *psin6; if (optlen < sizeof(struct group_req)) goto e_inval; retv = -EFAULT; if (copy_from_user(&greq, optval, sizeof(struct group_req))) break; if (greq.gr_group.ss_family != AF_INET6) { retv = -EADDRNOTAVAIL; break; } psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) retv = ipv6_sock_mc_join(sk, greq.gr_interface, &psin6->sin6_addr); else retv = ipv6_sock_mc_drop(sk, greq.gr_interface, &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_BLOCK_SOURCE: case MCAST_UNBLOCK_SOURCE: { struct group_source_req greqs; int omode, add; if (optlen < sizeof(struct group_source_req)) goto e_inval; if (copy_from_user(&greqs, optval, sizeof(greqs))) { retv = -EFAULT; break; } if (greqs.gsr_group.ss_family != AF_INET6 || greqs.gsr_source.ss_family != AF_INET6) { retv = -EADDRNOTAVAIL; break; } if (optname == MCAST_BLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 1; } else if (optname == MCAST_UNBLOCK_SOURCE) { omode = MCAST_EXCLUDE; add = 0; } else if (optname == MCAST_JOIN_SOURCE_GROUP) { struct sockaddr_in6 *psin6; psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; omode = MCAST_INCLUDE; add = 1; } else /* MCAST_LEAVE_SOURCE_GROUP */ { omode = MCAST_INCLUDE; add = 0; } retv = ip6_mc_source(add, omode, sk, &greqs); break; } case MCAST_MSFILTER: { struct group_filter *gsf; if (optlen < GROUP_FILTER_SIZE(0)) goto e_inval; if (optlen > sysctl_optmem_max) { retv = -ENOBUFS; break; } gsf = kmalloc(optlen, GFP_KERNEL); if (!gsf) { retv = -ENOBUFS; break; } retv = -EFAULT; if (copy_from_user(gsf, optval, optlen)) { kfree(gsf); break; } /* numsrc >= (4G-140)/128 overflow in 32 bits */ if (gsf->gf_numsrc >= 0x1ffffffU || gsf->gf_numsrc > sysctl_mld_max_msf) { kfree(gsf); retv = -ENOBUFS; break; } if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) { kfree(gsf); retv = -EINVAL; break; } retv = ip6_mc_msfilter(sk, gsf); kfree(gsf); break; } case IPV6_ROUTER_ALERT: if (optlen < sizeof(int)) goto e_inval; retv = ip6_ra_control(sk, val); break; case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) goto e_inval; np->pmtudisc = val; retv = 0; break; case IPV6_MTU: if (optlen < sizeof(int)) goto e_inval; if (val && val < IPV6_MIN_MTU) goto e_inval; np->frag_size = val; retv = 0; break; case IPV6_RECVERR: if (optlen < sizeof(int)) goto e_inval; np->recverr = valbool; if (!val) skb_queue_purge(&sk->sk_error_queue); retv = 0; break; case IPV6_FLOWINFO_SEND: if (optlen < sizeof(int)) goto e_inval; np->sndflow = valbool; retv = 0; break; case IPV6_FLOWLABEL_MGR: retv = ipv6_flowlabel_opt(sk, optval, optlen); break; case IPV6_IPSEC_POLICY: case IPV6_XFRM_POLICY: retv = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; retv = xfrm_user_policy(sk, optname, optval, optlen); break; case IPV6_ADDR_PREFERENCES: { unsigned int pref = 0; unsigned int prefmask = ~0; if (optlen < sizeof(int)) goto e_inval; retv = -EINVAL; /* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */ switch (val & (IPV6_PREFER_SRC_PUBLIC| IPV6_PREFER_SRC_TMP| IPV6_PREFER_SRC_PUBTMP_DEFAULT)) { case IPV6_PREFER_SRC_PUBLIC: pref |= IPV6_PREFER_SRC_PUBLIC; break; case IPV6_PREFER_SRC_TMP: pref |= IPV6_PREFER_SRC_TMP; break; case IPV6_PREFER_SRC_PUBTMP_DEFAULT: break; case 0: goto pref_skip_pubtmp; default: goto e_inval; } prefmask &= ~(IPV6_PREFER_SRC_PUBLIC| IPV6_PREFER_SRC_TMP); pref_skip_pubtmp: /* check HOME/COA conflicts */ switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) { case IPV6_PREFER_SRC_HOME: break; case IPV6_PREFER_SRC_COA: pref |= IPV6_PREFER_SRC_COA; case 0: goto pref_skip_coa; default: goto e_inval; } prefmask &= ~IPV6_PREFER_SRC_COA; pref_skip_coa: /* check CGA/NONCGA conflicts */ switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) { case IPV6_PREFER_SRC_CGA: case IPV6_PREFER_SRC_NONCGA: case 0: break; default: goto e_inval; } np->srcprefs = (np->srcprefs & prefmask) | pref; retv = 0; break; } case IPV6_MINHOPCOUNT: if (optlen < sizeof(int)) goto e_inval; if (val < 0 || val > 255) goto e_inval; np->min_hopcount = val; retv = 0; break; case IPV6_DONTFRAG: np->dontfrag = valbool; retv = 0; break; case IPV6_AUTOFLOWLABEL: np->autoflowlabel = valbool; retv = 0; break; case IPV6_RECVFRAGSIZE: np->rxopt.bits.recvfragsize = valbool; retv = 0; break; } release_sock(sk); if (needs_rtnl) rtnl_unlock(); return retv; e_inval: release_sock(sk); if (needs_rtnl) rtnl_unlock(); return -EINVAL; }
static void ifb_ri_tasklet(unsigned long _txp) { struct ifb_q_private *txp = (struct ifb_q_private *)_txp; struct netdev_queue *txq; struct sk_buff *skb; txq = netdev_get_tx_queue(txp->dev, txp->txqnum); skb = skb_peek(&txp->tq); if (!skb) { if (!__netif_tx_trylock(txq)) goto resched; skb_queue_splice_tail_init(&txp->rq, &txp->tq); __netif_tx_unlock(txq); } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { skb->tc_redirected = 0; skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); txp->tx_packets++; txp->tx_bytes += skb->len; u64_stats_update_end(&txp->tsync); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif); if (!skb->dev) { rcu_read_unlock(); dev_kfree_skb(skb); txp->dev->stats.tx_dropped++; if (skb_queue_len(&txp->tq) != 0) goto resched; break; } rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; if (!skb->tc_from_ingress) { dev_queue_xmit(skb); } else { skb_pull_rcsum(skb, skb->mac_len); netif_receive_skb(skb); } } if (__netif_tx_trylock(txq)) { skb = skb_peek(&txp->rq); if (!skb) { txp->tasklet_pending = 0; if (netif_tx_queue_stopped(txq)) netif_tx_wake_queue(txq); } else { __netif_tx_unlock(txq); goto resched; } __netif_tx_unlock(txq); } else { resched: txp->tasklet_pending = 1; tasklet_schedule(&txp->ifb_tasklet); } }
/* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ static void ip_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); const struct iphdr *iph; struct sk_buff *head = NULL; struct net *net; struct ipq *qp; int err; qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); spin_lock(&qp->q.lock); if (qp->q.flags & INET_FRAG_COMPLETE) goto out; ipq_kill(qp); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMTIMEOUT); if (!(qp->q.flags & INET_FRAG_FIRST_IN)) goto out; /* sk_buff::dev and sk_buff::rbnode are unionized. So we * pull the head out of the tree in order to be able to * deal with head->dev. */ if (qp->q.fragments) { head = qp->q.fragments; qp->q.fragments = head->next; } else { head = skb_rb_first(&qp->q.rb_fragments); if (!head) goto out; if (FRAG_CB(head)->next_frag) rb_replace_node(&head->rbnode, &FRAG_CB(head)->next_frag->rbnode, &qp->q.rb_fragments); else rb_erase(&head->rbnode, &qp->q.rb_fragments); memset(&head->rbnode, 0, sizeof(head->rbnode)); barrier(); } if (head == qp->q.fragments_tail) qp->q.fragments_tail = NULL; sub_frag_mem_limit(qp->q.net, head->truesize); head->dev = dev_get_by_index_rcu(net, qp->iif); if (!head->dev) goto out; /* skb has no dst, perform route lookup again */ iph = ip_hdr(head); err = ip_route_input_noref(head, iph->daddr, iph->saddr, iph->tos, head->dev); if (err) goto out; /* Only an end host needs to send an ICMP * "Fragment Reassembly Timeout" message, per RFC792. */ if (frag_expire_skip_icmp(qp->q.key.v4.user) && (skb_rtable(head)->rt_type != RTN_LOCAL)) goto out; spin_unlock(&qp->q.lock); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); goto out_rcu_unlock; out: spin_unlock(&qp->q.lock); out_rcu_unlock: rcu_read_unlock(); if (head) kfree_skb(head); ipq_put(qp); }
int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct inet6_dev *idev; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; if (ipv6_chk_addr(net, addr, NULL, 0)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); if (pac == NULL) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; rcu_read_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); if (rt) { dev = rt->dst.dev; dst_release(&rt->dst); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ dev = dev_get_by_flags_rcu(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } } else dev = dev_get_by_index_rcu(net, ifindex); if (dev == NULL) { err = -ENODEV; goto error; } idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; pac->acl_ifindex = dev->ifindex; /* XXX * For hosts, allow link-local or matching prefix anycasts. * This obviates the need for propagating anycast routes while * still allowing some non-router anycast participation. */ if (!ipv6_chk_prefix(addr, dev)) { if (ishost) err = -EADDRNOTAVAIL; if (err) goto error; } err = ipv6_dev_ac_inc(dev, addr); if (!err) { write_lock_bh(&ipv6_sk_ac_lock); pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; write_unlock_bh(&ipv6_sk_ac_lock); pac = NULL; } error: rcu_read_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); struct pfq_rx_opt * ro; struct pfq_tx_opt * to; bool found = true; if (so == NULL) return -EINVAL; ro = &so->rx_opt; to = &so->tx_opt; switch(optname) { case Q_SO_TOGGLE_QUEUE: { int active; if (optlen != sizeof(active)) return -EINVAL; if (copy_from_user(&active, optval, optlen)) return -EFAULT; if (active) { if (!so->mem_addr) { struct pfq_queue_hdr * queue; /* alloc queue memory */ if (pfq_shared_queue_alloc(so, pfq_queue_total_mem(so)) < 0) { return -ENOMEM; } /* so->mem_addr and so->mem_size are correctly configured */ /* initialize queues headers */ queue = (struct pfq_queue_hdr *)so->mem_addr; /* initialize rx queue header */ queue->rx.data = (1L << 24); queue->rx.poll_wait = 0; queue->rx.size = so->rx_opt.size; queue->rx.slot_size = so->rx_opt.slot_size; queue->tx.producer.index = 0; queue->tx.producer.cache = 0; queue->tx.consumer.index = 0; queue->tx.consumer.cache = 0; queue->tx.size_mask = so->tx_opt.size - 1; queue->tx.max_len = so->tx_opt.maxlen; queue->tx.size = so->tx_opt.size; queue->tx.slot_size = so->tx_opt.slot_size; /* update the queues base_addr */ so->rx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr); so->tx_opt.base_addr = so->mem_addr + sizeof(struct pfq_queue_hdr) + pfq_queue_mpdb_mem(so); /* commit both the queues */ smp_wmb(); so->rx_opt.queue_ptr = &queue->rx; so->tx_opt.queue_ptr = &queue->tx; pr_devel("[PFQ|%d] queue: rx_size:%d rx_slot_size:%d tx_size:%d tx_slot_size:%d\n", so->id, queue->rx.size, queue->rx.slot_size, queue->tx.size, queue->tx.slot_size); } } else { if (so->tx_opt.thread) { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); kthread_stop(so->tx_opt.thread); so->tx_opt.thread = NULL; } msleep(Q_GRACE_PERIOD); pfq_shared_queue_free(so); } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "add binding"); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, bind.gid, "remove binding"); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, bind.gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: if_index:%d hw_queue:%d\n", so->id, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; /* update the timestamp_enabled counter */ atomic_add(tstamp - so->rx_opt.tstamp, ×tamp_enabled); so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp_enabled counter: %d\n", so->id, atomic_read(×tamp_enabled)); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { pr_devel("[PFQ|%d] invalid caplen:%zu (max: %d)\n", so->id, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen:%zu -> slot_size:%zu\n", so->id, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)rx_queue_slots) { pr_devel("[PFQ|%d] invalid rx slots:%zu (max: %d)\n", so->id, slots, rx_queue_slots); return -EPERM; } so->rx_opt.size = slots; pr_devel("[PFQ|%d] rx_queue_slots:%zu\n", so->id, so->rx_opt.size); } break; case Q_SO_SET_TX_MAXLEN: { typeof (so->tx_opt.maxlen) maxlen; if (optlen != sizeof(maxlen)) return -EINVAL; if (copy_from_user(&maxlen, optval, optlen)) return -EFAULT; if (maxlen > (size_t)max_len) { pr_devel("[PFQ|%d] invalid maxlen:%zu (max: %d)\n", so->id, maxlen, max_len); return -EPERM; } so->tx_opt.maxlen = maxlen; so->tx_opt.slot_size = SPSC_QUEUE_SLOT_SIZE(so->tx_opt.maxlen); /* max_len: max length */ pr_devel("[PFQ|%d] tx_slot_size:%zu\n", so->id, so->rx_opt.slot_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots & (slots-1)) { pr_devel("[PFQ|%d] tx slots must be a power of two.\n", so->id); return -EINVAL; } if (slots > (size_t)tx_queue_slots) { pr_devel("[PFQ|%d] invalid tx slots:%zu (max: %d)\n", so->id, slots, tx_queue_slots); return -EPERM; } so->tx_opt.size = slots; pr_devel("[PFQ|%d] tx_queue_slots:%zu\n", so->id, so->tx_opt.size); } break; case Q_SO_GROUP_LEAVE: { int gid; if (optlen != sizeof(gid)) return -EINVAL; if (copy_from_user(&gid, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) { return -EFAULT; } pr_devel("[PFQ|%d] leave: gid:%d\n", so->id, gid); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, fprog.gid, "group fprog"); if (fprog.fcode.len > 0) /* set the filter */ { struct sk_filter *filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { pr_devel("[PFQ|%d] fprog error: alloc_sk_filter for gid:%d\n", so->id, fprog.gid); return -EINVAL; } __pfq_set_group_filter(fprog.gid, filter); pr_devel("[PFQ|%d] fprog: gid:%d (fprog len %d bytes)\n", so->id, fprog.gid, fprog.fcode.len); } else /* reset the filter */ { __pfq_set_group_filter(fprog.gid, NULL); pr_devel("[PFQ|%d] fprog: gid:%d (resetting filter)\n", so->id, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, vlan.gid, "group vlan filt toggle"); __pfq_toggle_group_vlan_filters(vlan.gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid:%d\n", so->id, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, filt.gid, "group vlan filt"); if (filt.vid < -1 || filt.vid > 4094) { pr_devel("[PFQ|%d] vlan_set error: gid:%d invalid vid:%d!\n", so->id, filt.gid, filt.vid); return -EINVAL; } if (!__pfq_vlan_filters_enabled(filt.gid)) { pr_devel("[PFQ|%d] vlan_set error: vlan filters disabled for gid:%d!\n", so->id, filt.gid); return -EPERM; } if (filt.vid == -1) /* any */ { int i; for(i = 1; i < 4095; i++) __pfq_set_group_vlan_filter(filt.gid, filt.toggle, i); } else { __pfq_set_group_vlan_filter(filt.gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan_set filter vid %d for gid:%d\n", so->id, filt.vid, filt.gid); } break; case Q_SO_TX_THREAD_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); pr_devel("[PFQ|%d] TX bind: invalid if_index:%d\n", so->id, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { pr_devel("[PFQ|%d] TX bind: invalid queue:%d\n", so->id, info.hw_queue); return -EPERM; } to->if_index = info.if_index; to->hw_queue = info.hw_queue; pr_devel("[PFQ|%d] TX bind: if_index:%d hw_queue:%d\n", so->id, to->if_index, to->hw_queue); } break; case Q_SO_TX_THREAD_START: { int cpu; if (to->thread) { pr_devel("[PFQ|%d] TX thread already created on cpu %d!\n", so->id, to->cpu); return -EPERM; } if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } if (optlen != sizeof(cpu)) return -EINVAL; if (copy_from_user(&cpu, optval, optlen)) return -EFAULT; if (cpu < -1 || (cpu > -1 && !cpu_online(cpu))) { pr_devel("[PFQ|%d] invalid cpu (%d)!\n", so->id, cpu); return -EPERM; } to->cpu = cpu; pr_devel("[PFQ|%d] creating TX thread on cpu %d -> if_index:%d hw_queue:%d\n", so->id, to->cpu, to->if_index, to->hw_queue); to->thread = kthread_create_on_node(pfq_tx_thread, so, to->cpu == -1 ? -1 : cpu_to_node(to->cpu), "pfq_tx_%d", so->id); if (IS_ERR(to->thread)) { printk(KERN_INFO "[PFQ] kernel_thread() create failed on cpu %d!\n", to->cpu); return PTR_ERR(to->thread); } if (to->cpu != -1) kthread_bind(to->thread, to->cpu); } break; case Q_SO_TX_THREAD_STOP: { pr_devel("[PFQ|%d] stopping TX thread...\n", so->id); if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } kthread_stop(to->thread); to->thread = NULL; pr_devel("[PFQ|%d] stop TX thread: done.\n", so->id); } break; case Q_SO_TX_THREAD_WAKEUP: { if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (!to->thread) { pr_devel("[PFQ|%d] TX thread not running!\n", so->id); return -EPERM; } wake_up_process(to->thread); } break; case Q_SO_TX_QUEUE_FLUSH: { struct net_device *dev; if (to->if_index == -1) { pr_devel("[PFQ|%d] socket TX not bound to any device!\n", so->id); return -EPERM; } if (to->thread && to->thread->state == TASK_RUNNING) { pr_devel("[PFQ|%d] TX thread is running!\n", so->id); return -EPERM; } if (to->queue_ptr == NULL) { pr_devel("[PFQ|%d] socket not enabled!\n", so->id); return -EPERM; } dev = dev_get_by_index(sock_net(&so->sk), to->if_index); if (!dev) { pr_devel("[PFQ|%d] No such device (if_index = %d)\n", so->id, to->if_index); return -EPERM; } pfq_tx_queue_flush(to, dev, get_cpu(), NUMA_NO_NODE); put_cpu(); dev_put(dev); } break; case Q_SO_GROUP_FUNCTION: { struct pfq_group_computation tmp; struct pfq_computation_descr *descr; size_t psize, ucsize; struct pfq_computation_tree *comp; void *context; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; CHECK_GROUP_ACCES(so->id, tmp.gid, "group computation"); if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { pr_devel("[PFQ|%d] computation: out of memory!\n", so->id); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { pr_devel("[PFQ|%d] computation: copy_from_user error!\n", so->id); kfree(descr); return -EFAULT; } /* print user computation */ pr_devel_computation_descr(descr); /* ensure the correctness of the specified functional computation */ if (pfq_validate_computation_descr(descr) < 0) { pr_devel("[PFQ|%d] invalid expression!\n", so->id); return -EFAULT; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { pr_devel("[PFQ|%d] context: alloc error!\n", so->id); kfree(descr); return -EFAULT; } /* allocate struct pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { pr_devel("[PFQ|%d] computation: alloc error!\n", so->id); kfree(context); kfree(descr); return -EFAULT; } /* link the functional computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { pr_devel("[PFQ|%d] computation aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* exec init functions */ if (pfq_computation_init(comp) < 0) { pr_devel("[PFQ|%d] computation initialization aborted!", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } /* set the new program */ if (pfq_set_group_prog(tmp.gid, comp, context) < 0) { pr_devel("[PFQ|%d] set group program error!\n", so->id); kfree(context); kfree(descr); kfree(comp); return -EPERM; } kfree(descr); return 0; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }
/* * Deliver the packet to the host */ void ip6_protocol_deliver_rcu(struct net *net, struct sk_buff *skb, int nexthdr, bool have_final) { const struct inet6_protocol *ipprot; struct inet6_dev *idev; unsigned int nhoff; bool raw; /* * Parse extension headers */ resubmit: idev = ip6_dst_idev(skb_dst(skb)); nhoff = IP6CB(skb)->nhoff; if (!have_final) { if (!pskb_pull(skb, skb_transport_offset(skb))) goto discard; nexthdr = skb_network_header(skb)[nhoff]; } resubmit_final: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); if (ipprot) { int ret; if (have_final) { if (!(ipprot->flags & INET6_PROTO_FINAL)) { /* Once we've seen a final protocol don't * allow encapsulation on any non-final * ones. This allows foo in UDP encapsulation * to work. */ goto discard; } } else if (ipprot->flags & INET6_PROTO_FINAL) { const struct ipv6hdr *hdr; int sdif = inet6_sdif(skb); struct net_device *dev; /* Only do this once for first final protocol */ have_final = true; /* Free reference early: we don't need it any more, and it may hold ip_conntrack module loaded indefinitely. */ nf_reset(skb); skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); hdr = ipv6_hdr(skb); /* skb->dev passed may be master dev for vrfs. */ if (sdif) { dev = dev_get_by_index_rcu(net, sdif); if (!dev) goto discard; } else { dev = skb->dev; } if (ipv6_addr_is_multicast(&hdr->daddr) && !ipv6_chk_mcast_addr(dev, &hdr->daddr, &hdr->saddr) && !ipv6_is_mld(skb, nexthdr, skb_network_header_len(skb))) goto discard; } if (!(ipprot->flags & INET6_PROTO_NOPOLICY) && !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; ret = INDIRECT_CALL_2(ipprot->handler, tcp_v6_rcv, udpv6_rcv, skb); if (ret > 0) { if (ipprot->flags & INET6_PROTO_FINAL) { /* Not an extension header, most likely UDP * encapsulation. Use return value as nexthdr * protocol not nhoff (which presumably is * not set by handler). */ nexthdr = ret; goto resubmit_final; } else { goto resubmit; } } else if (ret == 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); } } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff); } kfree_skb(skb); } else { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDELIVERS); consume_skb(skb); } } return; discard: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); kfree_skb(skb); }
int pfq_setsockopt(struct socket *sock, int level, int optname, char __user * optval, #if(LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)) unsigned #endif int optlen) { struct pfq_sock *so = pfq_sk(sock->sk); bool found = true; if (so == NULL) return -EINVAL; switch(optname) { case Q_SO_ENABLE: { unsigned long addr; int err = 0; if (optlen != sizeof(addr)) return -EINVAL; if (copy_from_user(&addr, optval, optlen)) return -EFAULT; err = pfq_shared_queue_enable(so, addr); if (err < 0) { printk(KERN_INFO "[PFQ|%d] enable error!\n", so->id.value); return err; } return 0; } break; case Q_SO_DISABLE: { int err = 0; size_t n; for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } err = pfq_shared_queue_disable(so); if (err < 0) { printk(KERN_INFO "[PFQ|%d] disable error!\n", so->id.value); return err; } } break; case Q_SO_GROUP_BIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] add bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] bind: invalid if_index=%d!\n", so->id.value, bind.if_index); return -EACCES; } rcu_read_unlock(); pfq_devmap_update(map_set, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_GROUP_UNBIND: { struct pfq_binding bind; pfq_gid_t gid; if (optlen != sizeof(struct pfq_binding)) return -EINVAL; if (copy_from_user(&bind, optval, optlen)) return -EFAULT; gid.value = bind.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] remove bind: gid=%d not joined!\n", so->id.value, bind.gid); return -EACCES; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), bind.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] unbind: invalid if_index=%d\n", so->id.value, bind.if_index); return -EPERM; } rcu_read_unlock(); pfq_devmap_update(map_reset, bind.if_index, bind.hw_queue, gid); } break; case Q_SO_EGRESS_BIND: { struct pfq_binding info; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] egress bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] egress bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } so->egress_type = pfq_endpoint_device; so->egress_index = info.if_index; so->egress_queue = info.hw_queue; pr_devel("[PFQ|%d] egress bind: device if_index=%d hw_queue=%d\n", so->id.value, so->egress_index, so->egress_queue); } break; case Q_SO_EGRESS_UNBIND: { so->egress_type = pfq_endpoint_socket; so->egress_index = 0; so->egress_queue = 0; pr_devel("[PFQ|%d] egress unbind.\n", so->id.value); } break; case Q_SO_SET_RX_TSTAMP: { int tstamp; if (optlen != sizeof(so->rx_opt.tstamp)) return -EINVAL; if (copy_from_user(&tstamp, optval, optlen)) return -EFAULT; tstamp = tstamp ? 1 : 0; so->rx_opt.tstamp = tstamp; pr_devel("[PFQ|%d] timestamp enabled.\n", so->id.value); } break; case Q_SO_SET_RX_CAPLEN: { typeof(so->rx_opt.caplen) caplen; if (optlen != sizeof(caplen)) return -EINVAL; if (copy_from_user(&caplen, optval, optlen)) return -EFAULT; if (caplen > (size_t)cap_len) { printk(KERN_INFO "[PFQ|%d] invalid caplen=%zu (max %d)\n", so->id.value, caplen, cap_len); return -EPERM; } so->rx_opt.caplen = caplen; so->rx_opt.slot_size = Q_MPDB_QUEUE_SLOT_SIZE(so->rx_opt.caplen); pr_devel("[PFQ|%d] caplen=%zu, slot_size=%zu\n", so->id.value, so->rx_opt.caplen, so->rx_opt.slot_size); } break; case Q_SO_SET_RX_SLOTS: { typeof(so->rx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Rx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->rx_opt.queue_size = slots; pr_devel("[PFQ|%d] rx_queue slots=%zu\n", so->id.value, so->rx_opt.queue_size); } break; case Q_SO_SET_TX_SLOTS: { typeof (so->tx_opt.queue_size) slots; if (optlen != sizeof(slots)) return -EINVAL; if (copy_from_user(&slots, optval, optlen)) return -EFAULT; if (slots > (size_t)max_queue_slots) { printk(KERN_INFO "[PFQ|%d] invalid Tx slots=%zu (max %d)\n", so->id.value, slots, max_queue_slots); return -EPERM; } so->tx_opt.queue_size = slots; pr_devel("[PFQ|%d] tx_queue slots=%zu\n", so->id.value, so->tx_opt.queue_size); } break; case Q_SO_GROUP_LEAVE: { pfq_gid_t gid; if (optlen != sizeof(gid.value)) return -EINVAL; if (copy_from_user(&gid.value, optval, optlen)) return -EFAULT; if (pfq_leave_group(gid, so->id) < 0) return -EFAULT; pr_devel("[PFQ|%d] leave: gid=%d\n", so->id.value, gid.value); } break; case Q_SO_GROUP_FPROG: { struct pfq_fprog fprog; pfq_gid_t gid; if (optlen != sizeof(fprog)) return -EINVAL; if (copy_from_user(&fprog, optval, optlen)) return -EFAULT; gid.value = fprog.gid; if (!pfq_has_joined_group(gid, so->id)) { /* don't set the first and return */ return 0; } if (fprog.fcode.len > 0) { /* set the filter */ struct sk_filter *filter; if (fprog.fcode.len == 1) { /* check for dummey BPF_CLASS == BPF_RET */ if (BPF_CLASS(fprog.fcode.filter[0].code) == BPF_RET) { pr_devel("[PFQ|%d] fprog: BPF_RET optimized out!\n", so->id.value); return 0; } } filter = pfq_alloc_sk_filter(&fprog.fcode); if (filter == NULL) { printk(KERN_INFO "[PFQ|%d] fprog error: alloc_sk_filter for gid=%d\n", so->id.value, fprog.gid); return -EINVAL; } pfq_set_group_filter(gid, filter); pr_devel("[PFQ|%d] fprog: gid=%d (fprog len %d bytes)\n", so->id.value, fprog.gid, fprog.fcode.len); } else { /* reset the filter */ pfq_set_group_filter(gid, NULL); pr_devel("[PFQ|%d] fprog: gid=%d (resetting filter)\n", so->id.value, fprog.gid); } } break; case Q_SO_GROUP_VLAN_FILT_TOGGLE: { struct pfq_vlan_toggle vlan; pfq_gid_t gid; if (optlen != sizeof(vlan)) return -EINVAL; if (copy_from_user(&vlan, optval, optlen)) return -EFAULT; gid.value = vlan.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter toggle: gid=%d not joined!\n", so->id.value, vlan.gid); return -EACCES; } pfq_toggle_group_vlan_filters(gid, vlan.toggle); pr_devel("[PFQ|%d] vlan filters %s for gid=%d\n", so->id.value, (vlan.toggle ? "enabled" : "disabled"), vlan.gid); } break; case Q_SO_GROUP_VLAN_FILT: { struct pfq_vlan_toggle filt; pfq_gid_t gid; if (optlen != sizeof(filt)) return -EINVAL; if (copy_from_user(&filt, optval, optlen)) return -EFAULT; gid.value = filt.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] vlan filter: gid=%d not joined!\n", so->id.value, filt.gid); return -EACCES; } if (filt.vid < -1 || filt.vid > 4094) { printk(KERN_INFO "[PFQ|%d] vlan error: invalid vid=%d for gid=%d!\n", so->id.value, filt.vid, filt.gid); return -EINVAL; } if (!pfq_vlan_filters_enabled(gid)) { printk(KERN_INFO "[PFQ|%d] vlan error: vlan filters disabled for gid=%d!\n", so->id.value, filt.gid); return -EPERM; } if (filt.vid == -1) { /* any */ int i; for(i = 1; i < 4095; i++) { pfq_set_group_vlan_filter(gid, filt.toggle, i); } } else { pfq_set_group_vlan_filter(gid, filt.toggle, filt.vid); } pr_devel("[PFQ|%d] vlan filter vid %d set for gid=%d\n", so->id.value, filt.vid, filt.gid); } break; case Q_SO_TX_BIND: { struct pfq_binding info; size_t i; if (optlen != sizeof(info)) return -EINVAL; if (copy_from_user(&info, optval, optlen)) return -EFAULT; if (so->tx_opt.num_queues >= Q_MAX_TX_QUEUES) { printk(KERN_INFO "[PFQ|%d] Tx bind: max number of queues exceeded!\n", so->id.value); return -EPERM; } rcu_read_lock(); if (!dev_get_by_index_rcu(sock_net(&so->sk), info.if_index)) { rcu_read_unlock(); printk(KERN_INFO "[PFQ|%d] Tx bind: invalid if_index=%d\n", so->id.value, info.if_index); return -EPERM; } rcu_read_unlock(); if (info.hw_queue < -1) { printk(KERN_INFO "[PFQ|%d] Tx bind: invalid queue=%d\n", so->id.value, info.hw_queue); return -EPERM; } i = so->tx_opt.num_queues; if (info.cpu < -1) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] thread: invalid cpu (%d)!\n", so->id.value, i, info.cpu); return -EPERM; } so->tx_opt.queue[i].if_index = info.if_index; so->tx_opt.queue[i].hw_queue = info.hw_queue; so->tx_opt.queue[i].cpu = info.cpu; so->tx_opt.num_queues++; pr_devel("[PFQ|%d] Tx[%zu] bind: if_index=%d hw_queue=%d cpu=%d\n", so->id.value, i, so->tx_opt.queue[i].if_index, so->tx_opt.queue[i].hw_queue, info.cpu); } break; case Q_SO_TX_UNBIND: { size_t n; for(n = 0; n < Q_MAX_TX_QUEUES; ++n) { so->tx_opt.queue[n].if_index = -1; so->tx_opt.queue[n].hw_queue = -1; so->tx_opt.queue[n].cpu = -1; } } break; case Q_SO_TX_FLUSH: { int queue, err = 0; size_t n; if (optlen != sizeof(queue)) return -EINVAL; if (copy_from_user(&queue, optval, optlen)) return -EFAULT; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } if (queue < -1 || (queue > 0 && queue >= so->tx_opt.num_queues)) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: bad queue %d (num_queue=%zu)!\n", so->id.value, queue, so->tx_opt.num_queues); return -EPERM; } if (queue != -1) { pr_devel("[PFQ|%d] flushing Tx queue %d...\n", so->id.value, queue); return pfq_queue_flush(so, queue); } for(n = 0; n < so->tx_opt.num_queues; n++) { if (pfq_queue_flush(so, n) != 0) { printk(KERN_INFO "[PFQ|%d] Tx[%zu] queue flush: flush error (if_index=%d)!\n", so->id.value, n, so->tx_opt.queue[n].if_index); err = -EPERM; } } if (err) return err; } break; case Q_SO_TX_ASYNC: { int toggle, err = 0; size_t n; if (optlen != sizeof(toggle)) return -EINVAL; if (copy_from_user(&toggle, optval, optlen)) return -EFAULT; if (toggle) { size_t started = 0; if (pfq_get_tx_queue(&so->tx_opt, 0) == NULL) { printk(KERN_INFO "[PFQ|%d] Tx queue flush: socket not enabled!\n", so->id.value); return -EPERM; } /* start Tx kernel threads */ for(n = 0; n < Q_MAX_TX_QUEUES; n++) { struct pfq_thread_data *data; int node; if (so->tx_opt.queue[n].if_index == -1) break; if (so->tx_opt.queue[n].cpu == Q_NO_KTHREAD) continue; if (so->tx_opt.queue[n].task) { printk(KERN_INFO "[PFQ|%d] kernel_thread: Tx[%zu] thread already running!\n", so->id.value, n); continue; } data = kmalloc(sizeof(struct pfq_thread_data), GFP_KERNEL); if (!data) { printk(KERN_INFO "[PFQ|%d] kernel_thread: could not allocate thread_data! Failed starting thread on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = -EPERM; continue; } data->so = so; data->id = n; node = cpu_online(so->tx_opt.queue[n].cpu) ? cpu_to_node(so->tx_opt.queue[n].cpu) : NUMA_NO_NODE; pr_devel("[PFQ|%d] creating Tx[%zu] thread on cpu %d: if_index=%d hw_queue=%d\n", so->id.value, n, so->tx_opt.queue[n].cpu, so->tx_opt.queue[n].if_index, so->tx_opt.queue[n].hw_queue); so->tx_opt.queue[n].task = kthread_create_on_node(pfq_tx_thread, data, node, "pfq_tx_%d#%zu", so->id.value, n); if (IS_ERR(so->tx_opt.queue[n].task)) { printk(KERN_INFO "[PFQ|%d] kernel_thread: create failed on cpu %d!\n", so->id.value, so->tx_opt.queue[n].cpu); err = PTR_ERR(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; kfree (data); continue; } /* bind the thread */ kthread_bind(so->tx_opt.queue[n].task, so->tx_opt.queue[n].cpu); /* start it */ wake_up_process(so->tx_opt.queue[n].task); started++; } if (started == 0) { printk(KERN_INFO "[PFQ|%d] no kernel thread started!\n", so->id.value); err = -EPERM; } } else { /* stop running threads */ for(n = 0; n < so->tx_opt.num_queues; n++) { if (so->tx_opt.queue[n].task) { pr_devel("[PFQ|%d] stopping Tx[%zu] kernel thread@%p\n", so->id.value, n, so->tx_opt.queue[n].task); kthread_stop(so->tx_opt.queue[n].task); so->tx_opt.queue[n].task = NULL; } } } return err; } break; case Q_SO_GROUP_FUNCTION: { struct pfq_computation_descr *descr = NULL; struct pfq_computation_tree *comp = NULL; struct pfq_group_computation tmp; size_t psize, ucsize; void *context = NULL; pfq_gid_t gid; int err = 0; if (optlen != sizeof(tmp)) return -EINVAL; if (copy_from_user(&tmp, optval, optlen)) return -EFAULT; gid.value = tmp.gid; if (!pfq_has_joined_group(gid, so->id)) { printk(KERN_INFO "[PFQ|%d] group computation: gid=%d not joined!\n", so->id.value, tmp.gid); return -EACCES; } if (copy_from_user(&psize, tmp.prog, sizeof(size_t))) return -EFAULT; pr_devel("[PFQ|%d] computation size: %zu\n", so->id.value, psize); ucsize = sizeof(size_t) * 2 + psize * sizeof(struct pfq_functional_descr); descr = kmalloc(ucsize, GFP_KERNEL); if (descr == NULL) { printk(KERN_INFO "[PFQ|%d] computation: out of memory!\n", so->id.value); return -ENOMEM; } if (copy_from_user(descr, tmp.prog, ucsize)) { printk(KERN_INFO "[PFQ|%d] computation: copy_from_user error!\n", so->id.value); err = -EFAULT; goto error; } /* print user computation */ pr_devel_computation_descr(descr); /* check the correctness of computation */ if (pfq_check_computation_descr(descr) < 0) { printk(KERN_INFO "[PFQ|%d] invalid expression!\n", so->id.value); err = -EFAULT; goto error; } /* allocate context */ context = pfq_context_alloc(descr); if (context == NULL) { printk(KERN_INFO "[PFQ|%d] context: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* allocate a pfq_computation_tree */ comp = pfq_computation_alloc(descr); if (comp == NULL) { printk(KERN_INFO "[PFQ|%d] computation: alloc error!\n", so->id.value); err = -EFAULT; goto error; } /* link functions of computation */ if (pfq_computation_rtlink(descr, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] computation aborted!", so->id.value); err = -EPERM; goto error; } /* print executable tree data structure */ pr_devel_computation_tree(comp); /* run init functions */ if (pfq_computation_init(comp) < 0) { printk(KERN_INFO "[PFQ|%d] initialization of computation aborted!", so->id.value); pfq_computation_fini(comp); err = -EPERM; goto error; } /* enable functional program */ if (pfq_set_group_prog(gid, comp, context) < 0) { printk(KERN_INFO "[PFQ|%d] set group program error!\n", so->id.value); err = -EPERM; goto error; } kfree(descr); return 0; error: kfree(comp); kfree(context); kfree(descr); return err; } break; default: { found = false; } break; } return found ? 0 : sock_setsockopt(sock, level, optname, optval, optlen); }