int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be32 vni) { struct vxlanhdr *vxh; struct udphdr *uh; int min_headroom; int err; min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + VXLAN_HLEN + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) { kfree_skb(skb); return err; } if (vlan_tx_tag_present(skb)) { if (unlikely(!vlan_insert_tag_set_proto(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) return -ENOMEM; vlan_set_tci(skb, 0); } skb_reset_inner_headers(skb); vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); uh->check = 0; vxlan_set_owner(vs->sock->sk, skb); skb = handle_offloads(skb); if (IS_ERR(skb)) return PTR_ERR(skb); return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, false); }
struct sk_buff *vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; u16 vlan_tci; if (unlikely(vlan_tx_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) goto err_free; if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) goto err_free; vhdr = (struct vlan_hdr *) skb->data; vlan_tci = ntohs(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); vlan_set_encap_proto(skb, vhdr); skb = vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; return skb; err_free: kfree_skb(skb); return NULL; }
static int __send(struct vport *vport, struct sk_buff *skb, int tunnel_hlen, __be32 seq, __be16 gre64_flag) { struct ovs_key_ipv4_tunnel *tun_key = &OVS_CB(skb)->tun_info->tunnel; struct rtable *rt; int min_headroom; __be16 df; __be32 saddr; int err; /* Route lookup */ saddr = tun_key->ipv4_src; rt = find_route(ovs_dp_get_net(vport->dp), &saddr, tun_key->ipv4_dst, IPPROTO_GRE, tun_key->ipv4_tos, skb->mark); if (IS_ERR(rt)) { err = PTR_ERR(rt); goto error; } min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len + tunnel_hlen + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + 16); err = pskb_expand_head(skb, max_t(int, head_delta, 0), 0, GFP_ATOMIC); if (unlikely(err)) goto err_free_rt; }
int bpf_push_vlan(struct bpf_context *pctx, u16 proto, u16 vlan) { struct bpf_dp_context *ctx = container_of(pctx, struct bpf_dp_context, context); struct sk_buff *skb = ctx->skb; u16 current_tag; if (unlikely(!skb)) return -EINVAL; if (vlan_tx_tag_present(skb)) { current_tag = vlan_tx_tag_get(skb); if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) { ctx->skb = NULL; return -ENOMEM; } if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); ctx->context.length = skb->len; } __vlan_hwaccel_put_tag(skb, proto, vlan); ctx->context.vlan_tag = vlan; return 0; }
static int bpf_dp_ctx_init(struct bpf_dp_context *ctx) { struct ovs_key_ipv4_tunnel *tun_key = OVS_CB(ctx->skb)->tun_key; if (skb_headroom(ctx->skb) < 64) { if (pskb_expand_head(ctx->skb, 64, 0, GFP_ATOMIC)) return -ENOMEM; } ctx->context.length = ctx->skb->len; ctx->context.vlan_tag = vlan_tx_tag_present(ctx->skb) ? vlan_tx_tag_get(ctx->skb) : 0; ctx->context.hw_csum = (ctx->skb->ip_summed == CHECKSUM_PARTIAL); if (tun_key) { ctx->context.tun_key.tun_id = be32_to_cpu(be64_get_low32(tun_key->tun_id)); ctx->context.tun_key.src_ip = be32_to_cpu(tun_key->ipv4_src); ctx->context.tun_key.dst_ip = be32_to_cpu(tun_key->ipv4_dst); ctx->context.tun_key.tos = tun_key->ipv4_tos; ctx->context.tun_key.ttl = tun_key->ipv4_ttl; } else { memset(&ctx->context.tun_key, 0, sizeof(struct bpf_ipv4_tun_key)); } return 0; }
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) { if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; /* Update mac_len for subsequent MPLS actions */ skb->mac_len += VLAN_HLEN; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); invalidate_skb_flow_key(skb); } else { flow_key_set_vlan_tci(skb, vlan->vlan_tci); } __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; }
static int pop_vlan(struct sk_buff *skb) { __be16 tci; int err; if (likely(vlan_tx_tag_present(skb))) { vlan_set_tci(skb, 0); } else { if (unlikely(skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)) return 0; err = __pop_vlan_tci(skb, &tci); if (err) return err; } /* move next vlan tag to hw accel tag */ if (likely(skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)) return 0; err = __pop_vlan_tci(skb, &tci); if (unlikely(err)) return err; __vlan_hwaccel_put_tag(skb, ntohs(tci)); return 0; }
/* Transmit a fully formatted Geneve frame. * * When calling this function. The skb->data should point * to the geneve header which is fully formed. * * This function will add other UDP tunnel headers. */ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt, bool xnet) { struct genevehdr *gnvh; int min_headroom; int err; skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); err = skb_cow_head(skb, min_headroom); if (unlikely(err)) return err; skb = vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) return -ENOMEM; gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet); }
struct sk_buff * pfq_vlan_untag(struct sk_buff *skb) { struct vlan_hdr *vhdr; uint16_t vlan_tci; if (unlikely(vlan_tx_tag_present(skb))) { /* vlan_tci is already set-up so leave this for another time */ return skb; } if (unlikely(!pskb_may_pull(skb, VLAN_HLEN))) goto err_free; vhdr = (struct vlan_hdr *) skb->data; vlan_tci = be16_to_cpu(vhdr->h_vlan_TCI); __vlan_hwaccel_put_tag(skb, vlan_tci); skb_pull_rcsum(skb, VLAN_HLEN); pfq_vlan_set_encap_proto(skb, vhdr); skb = pfq_vlan_reorder_header(skb); if (unlikely(!skb)) goto err_free; skb_reset_network_header(skb); skb_reset_transport_header(skb); return skb; err_free: kfree_skb(skb); return NULL; }
uint32_t __adf_net_get_vlantag(struct sk_buff *skb) { if(!vlan_tx_tag_present(skb)) return A_STATUS_ENOTSUPP; return vlan_tx_tag_get(skb); }
static inline __be16 vlan_proto(const struct sk_buff *skb) { if (vlan_tx_tag_present(skb)) return skb->protocol; else if (skb->protocol == htons(ETH_P_8021Q)) return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; else return 0; }
int rpl_dev_queue_xmit(struct sk_buff *skb) { #undef dev_queue_xmit int err = -ENOMEM; if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { int features; features = netif_skb_features(skb); if (!vlan_tso) features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO | NETIF_F_FSO); skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); if (unlikely(!skb)) return err; vlan_set_tci(skb, 0); if (netif_needs_gso(skb, features)) { struct sk_buff *nskb; nskb = skb_gso_segment(skb, features); if (!nskb) { if (unlikely(skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto drop; skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY; goto xmit; } if (IS_ERR(nskb)) { err = PTR_ERR(nskb); goto drop; } consume_skb(skb); skb = nskb; do { nskb = skb->next; skb->next = NULL; err = dev_queue_xmit(skb); skb = nskb; } while (skb); return err; } } xmit: return dev_queue_xmit(skb); drop: kfree_skb(skb); return err; }
int bpf_pop_vlan(struct bpf_context *pctx) { struct bpf_dp_context *ctx = container_of(pctx, struct bpf_dp_context, context); struct sk_buff *skb = ctx->skb; if (unlikely(!skb)) return -EINVAL; ctx->context.vlan_tag = 0; if (vlan_tx_tag_present(skb)) { skb->vlan_tci = 0; } else { if (skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN) return 0; if (!pskb_may_pull(skb, ETH_HLEN)) return 0; __skb_pull(skb, ETH_HLEN); skb = vlan_untag(skb); if (!skb) { ctx->skb = NULL; return -ENOMEM; } __skb_push(skb, ETH_HLEN); skb->vlan_tci = 0; ctx->context.length = skb->len; ctx->skb = skb; } /* move next vlan tag to hw accel tag */ if (skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN) return 0; if (!pskb_may_pull(skb, ETH_HLEN)) return 0; __skb_pull(skb, ETH_HLEN); skb = vlan_untag(skb); if (!skb) { ctx->skb = NULL; return -ENOMEM; } __skb_push(skb, ETH_HLEN); ctx->context.vlan_tag = vlan_tx_tag_get(skb); ctx->context.length = skb->len; ctx->skb = skb; return 0; }
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 vlan_tag = 0; /* */ if (priv->prof->rx_ppp && vlan_tx_tag_present(skb)) { vlan_tag = vlan_tx_tag_get(skb); return MLX4_EN_NUM_TX_RINGS + (vlan_tag >> 13); }
static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct net_device *dev) { struct net_device *vlan, *br; br = bridge_parent(dev); if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb)) return br; vlan = __vlan_find_dev_deep(br, vlan_tx_tag_get(skb) & VLAN_VID_MASK); return vlan ? vlan : br; }
u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb) { struct mlx4_en_priv *priv = netdev_priv(dev); u16 vlan_tag = 0; /* If we support per priority flow control and the packet contains * a vlan tag, send the packet to the TX ring assigned to that priority */ if (priv->prof->rx_ppp && vlan_tx_tag_present(skb)) { vlan_tag = vlan_tx_tag_get(skb); return MLX4_EN_NUM_TX_RINGS + (vlan_tag >> 13); }
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) { if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); if (!__vlan_put_tag(skb, current_tag)) return -ENOMEM; if (get_ip_summed(skb) == OVS_CSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + ETH_HLEN, VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; }
static int pop_vlan(struct sk_buff *skb) { __be16 tci; int err; if (likely(vlan_tx_tag_present(skb))) { skb->vlan_tci = 0; } else { if (unlikely(skb->protocol != htons(ETH_P_8021Q) || skb->len < VLAN_ETH_HLEN)) return 0; err = __pop_vlan_tci(skb, &tci); if (err) return err; } return 0; }
static int gre_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct ovs_key_ipv4_tunnel *tun_key; struct flowi4 fl; struct rtable *rt; int min_headroom; int tunnel_hlen; __be16 df; int err; if (unlikely(!OVS_CB(skb)->egress_tun_key)) { err = -EINVAL; goto error; } tun_key = OVS_CB(skb)->egress_tun_key; /* Route lookup */ memset(&fl, 0, sizeof(fl)); fl.daddr = tun_key->ipv4_dst; fl.saddr = tun_key->ipv4_src; fl.flowi4_tos = RT_TOS(tun_key->ipv4_tos); fl.flowi4_mark = skb->mark; fl.flowi4_proto = IPPROTO_GRE; rt = ip_route_output_key(net, &fl); if (IS_ERR(rt)) return PTR_ERR(rt); tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags); min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr) + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { int head_delta = SKB_DATA_ALIGN(min_headroom - skb_headroom(skb) + 16); err = pskb_expand_head(skb, max_t(int, head_delta, 0), 0, GFP_ATOMIC); if (unlikely(err)) goto err_free_rt; }
static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb, int real_size, u16 *vlan_tag, int tx_ind, void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; if (skb->len <= spc) { inl->byte_count = cpu_to_be32(1 << 31 | skb->len); skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); if (skb_shinfo(skb)->nr_frags) memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, skb_frag_size(&skb_shinfo(skb)->frags[0])); } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); if (skb_headlen(skb) <= spc) { skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); if (skb_headlen(skb) < spc) { memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, spc - skb_headlen(skb)); fragptr += spc - skb_headlen(skb); } inl = (void *) (inl + 1) + spc; memcpy(((void *)(inl + 1)), fragptr, skb->len - spc); } else { skb_copy_from_linear_data(skb, inl + 1, spc); inl = (void *) (inl + 1) + spc; skb_copy_from_linear_data_offset(skb, spc, inl + 1, skb_headlen(skb) - spc); if (skb_shinfo(skb)->nr_frags) memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc, fragptr, skb_frag_size(&skb_shinfo(skb)->frags[0])); } wmb(); inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc)); } tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag); tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * (!!vlan_tx_tag_present(skb)); tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; }
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) { if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto, current_tag); if (!skb) return -ENOMEM; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); } __vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; }
static inline void enic_queue_wq_skb(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb) { unsigned int mss = skb_shinfo(skb)->gso_size; unsigned int vlan_tag = 0; int vlan_tag_insert = 0; if (enic->vlan_group && vlan_tx_tag_present(skb)) { vlan_tag_insert = 1; vlan_tag = vlan_tx_tag_get(skb); } if (mss) enic_queue_wq_skb_tso(enic, wq, skb, mss, vlan_tag_insert, vlan_tag); else if (skb->ip_summed == CHECKSUM_PARTIAL) enic_queue_wq_skb_csum_l4(enic, wq, skb, vlan_tag_insert, vlan_tag); else enic_queue_wq_skb_vlan(enic, wq, skb, vlan_tag_insert, vlan_tag); }
/** * ovs_vport_receive - pass up received packet to the datapath for processing * * @vport: vport that received the packet * @skb: skb that was received * @tun_key: tunnel (if any) that carried packet * * Must be called with rcu_read_lock. The packet cannot be shared and * skb->data should point to the Ethernet header. */ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb, const struct ovs_tunnel_info *tun_info) { struct pcpu_sw_netstats *stats; struct sw_flow_key key; int error; stats = this_cpu_ptr(vport->percpu_stats); u64_stats_update_begin(&stats->syncp); stats->rx_packets++; stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); u64_stats_update_end(&stats->syncp); OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->egress_tun_info = NULL; /* Extract flow from 'skb' into 'key'. */ error = ovs_flow_key_extract(tun_info, skb, &key); if (unlikely(error)) { kfree_skb(skb); return; } ovs_dp_process_packet(skb, &key); }
static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan) { /* * Push an eventual existing hardware accel VLAN tag to the skb first * to maintain correct order. */ if (unlikely(vlan_tx_tag_present(skb))) { u16 current_tag; /* push down current VLAN tag */ current_tag = vlan_tx_tag_get(skb); if (!__vlan_put_tag(skb, current_tag)) return -ENOMEM; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + ETH_HLEN, VLAN_HLEN, 0)); } __vlan_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); return 0; }
netdev_features_t rpl_netif_skb_features(struct sk_buff *skb) { unsigned long vlan_features = skb->dev->vlan_features; __be16 protocol = skb->protocol; netdev_features_t features = skb->dev->features; if (protocol == htons(ETH_P_8021Q)) { struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } else if (!vlan_tx_tag_present(skb)) { return harmonize_features(skb, protocol, features); } features &= (vlan_features | NETIF_F_HW_VLAN_TX); if (protocol != htons(ETH_P_8021Q)) { return harmonize_features(skb, protocol, features); } else { features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX; return harmonize_features(skb, protocol, features); } }
static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) { struct net_device *netdev = netdev_vport_priv(vport)->dev; int len; #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) if (vlan_tx_tag_present(skb)) { if (unlikely(!__vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)))) return 0; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data + (2 * ETH_ALEN), VLAN_HLEN, 0)); vlan_set_tci(skb, 0); } #endif len = skb->len; skb_dst_drop(skb); nf_reset(skb); secpath_reset(skb); skb->dev = netdev; skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, netdev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); netif_rx(skb); return len; }
/* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in, s; int head; struct msghdr msg = { .msg_name = NULL, .msg_namelen = 0, .msg_control = NULL, .msg_controllen = 0, .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; int err; size_t hdr_size; struct socket *sock; struct vhost_net_ubuf_ref *uninitialized_var(ubufs); bool zcopy, zcopy_used; mutex_lock(&vq->mutex); sock = vq->private_data; if (!sock) goto out; vhost_disable_notify(&net->dev, vq); hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; for (;;) { /* Release DMAs done buffers first */ if (zcopy) vhost_zerocopy_signal_used(net, vq); /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ if (unlikely((nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV == nvq->done_idx)) break; head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), &out, &in, NULL, NULL); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; } break; } if (in) { vq_err(vq, "Unexpected descriptor format for TX: " "out %d, int %d\n", out, in); break; } /* Skip header. TODO: support TSO. */ s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); len = iov_length(vq->iov, out); iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len); /* Sanity check */ if (!len) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", iov_length(nvq->hdr, s), hdr_size); break; } zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN && (nvq->upend_idx + 1) % UIO_MAXIOV != nvq->done_idx && vhost_net_tx_select_zcopy(net); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { struct ubuf_info *ubuf; ubuf = nvq->ubuf_info + nvq->upend_idx; vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; ubuf->ctx = nvq->ubufs; ubuf->desc = nvq->upend_idx; msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); ubufs = nvq->ubufs; atomic_inc(&ubufs->refcount); nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } else { msg.msg_control = NULL; ubufs = NULL; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); if (unlikely(err < 0)) { if (zcopy_used) { vhost_net_ubuf_put(ubufs); nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); break; } if (err != len) pr_debug("Truncated TX packet: " " len %d != %zd\n", err, len); if (!zcopy_used) vhost_add_used_and_signal(&net->dev, vq, head, 0); else vhost_zerocopy_signal_used(net, vq); total_len += len; vhost_net_tx_packet(net); if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); break; } } out: mutex_unlock(&vq->mutex); } static int peek_head_len(struct sock *sk) { struct sk_buff *head; int len = 0; unsigned long flags; spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); head = skb_peek(&sk->sk_receive_queue); if (likely(head)) { len = head->len; if (vlan_tx_tag_present(head)) len += VLAN_HLEN; } spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); return len; } /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue * @datalen - data length we'll be reading * @iovcount - returned count of io vectors we fill * @log - vhost log * @log_num - log offset * @quota - headcount quota, 1 for big buffer * returns number of buffer heads allocated, negative on error */ static int get_rx_bufs(struct vhost_virtqueue *vq, struct vring_used_elem *heads, int datalen, unsigned *iovcount, struct vhost_log *log, unsigned *log_num, unsigned int quota) { unsigned int out, in; int seg = 0; int headcount = 0; unsigned d; int r, nlogs = 0; /* len is always initialized before use since we are always called with * datalen > 0. */ u32 uninitialized_var(len); while (datalen > 0 && headcount < quota) { if (unlikely(seg >= UIO_MAXIOV)) { r = -ENOBUFS; goto err; } r = vhost_get_vq_desc(vq, vq->iov + seg, ARRAY_SIZE(vq->iov) - seg, &out, &in, log, log_num); if (unlikely(r < 0)) goto err; d = r; if (d == vq->num) { r = 0; goto err; } if (unlikely(out || in <= 0)) { vq_err(vq, "unexpected descriptor format for RX: " "out %d, in %d\n", out, in); r = -EINVAL; goto err; } if (unlikely(log)) { nlogs += *log_num; log += *log_num; } heads[headcount].id = cpu_to_vhost32(vq, d); len = iov_length(vq->iov + seg, in); heads[headcount].len = cpu_to_vhost32(vq, len); datalen -= len; ++headcount; seg += in; } heads[headcount - 1].len = cpu_to_vhost32(vq, len - datalen); *iovcount = seg; if (unlikely(log)) *log_num = nlogs; /* Detect overrun */ if (unlikely(datalen > 0)) { r = UIO_MAXIOV + 1; goto err; } return headcount; err: vhost_discard_vq_desc(vq, headcount); return r; }
/** * key_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the * Ethernet header * @key: output flow key * * The caller must ensure that skb->len >= ETH_HLEN. * * Returns 0 if successful, otherwise a negative errno value. * * Initializes @skb header pointers as follows: * * - skb->mac_header: the Ethernet header. * * - skb->network_header: just past the Ethernet header, or just past the * VLAN header, to the first byte of the Ethernet payload. * * - skb->transport_header: If key->eth.type is ETH_P_IP or ETH_P_IPV6 * on output, then just past the IP header, if one is present and * of a correct length, otherwise the same as skb->network_header. * For other key->eth.type values it is left untouched. */ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) { int error; struct ethhdr *eth; /* Flags are always used as part of stats */ key->tp.flags = 0; skb_reset_mac_header(skb); /* Link layer. We are guaranteed to have at least the 14 byte Ethernet * header in the linear data area. */ eth = eth_hdr(skb); ether_addr_copy(key->eth.src, eth->h_source); ether_addr_copy(key->eth.dst, eth->h_dest); __skb_pull(skb, 2 * ETH_ALEN); /* We are going to push all headers that we pull, so no need to * update skb->csum here. */ key->eth.tci = 0; if (vlan_tx_tag_present(skb)) key->eth.tci = htons(vlan_get_tci(skb)); else if (eth->h_proto == htons(ETH_P_8021Q)) if (unlikely(parse_vlan(skb, key))) return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) return -ENOMEM; skb_reset_network_header(skb); skb_reset_mac_len(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ if (key->eth.type == htons(ETH_P_IP)) { struct iphdr *nh; __be16 offset; error = check_iphdr(skb); if (unlikely(error)) { memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); if (error == -EINVAL) { skb->transport_header = skb->network_header; error = 0; } return error; } nh = ip_hdr(skb); key->ipv4.addr.src = nh->saddr; key->ipv4.addr.dst = nh->daddr; key->ip.proto = nh->protocol; key->ip.tos = nh->tos; key->ip.ttl = nh->ttl; offset = nh->frag_off & htons(IP_OFFSET); if (offset) { key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } if (nh->frag_off & htons(IP_MF) || skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; /* Transport layer. */ if (key->ip.proto == IPPROTO_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); key->tp.src = tcp->source; key->tp.dst = tcp->dest; key->tp.flags = TCP_FLAGS_BE16(tcp); } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->tp.src = udp->source; key->tp.dst = udp->dest; } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == IPPROTO_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); key->tp.src = sctp->source; key->tp.dst = sctp->dest; } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { struct icmphdr *icmp = icmp_hdr(skb); /* The ICMP type and code fields use the 16-bit * transport port fields, so we need to store * them in 16-bit network byte order. */ key->tp.src = htons(icmp->type); key->tp.dst = htons(icmp->code); } else { memset(&key->tp, 0, sizeof(key->tp)); } } } else if (key->eth.type == htons(ETH_P_ARP) || key->eth.type == htons(ETH_P_RARP)) { struct arp_eth_header *arp; bool arp_available = arphdr_ok(skb); arp = (struct arp_eth_header *)skb_network_header(skb); if (arp_available && arp->ar_hrd == htons(ARPHRD_ETHER) && arp->ar_pro == htons(ETH_P_IP) && arp->ar_hln == ETH_ALEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) key->ip.proto = ntohs(arp->ar_op); else key->ip.proto = 0; memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src)); memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst)); ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha); ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha); } else { memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); } } else if (eth_p_mpls(key->eth.type)) { size_t stack_len = MPLS_HLEN; /* In the presence of an MPLS label stack the end of the L2 * header and the beginning of the L3 header differ. * * Advance network_header to the beginning of the L3 * header. mac_len corresponds to the end of the L2 header. */ while (1) { __be32 lse; error = check_header(skb, skb->mac_len + stack_len); if (unlikely(error)) return 0; memcpy(&lse, skb_network_header(skb), MPLS_HLEN); if (stack_len == MPLS_HLEN) memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); skb_set_network_header(skb, skb->mac_len + stack_len); if (lse & htonl(MPLS_LS_S_MASK)) break; stack_len += MPLS_HLEN; } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ nh_len = parse_ipv6hdr(skb, key); if (unlikely(nh_len < 0)) { memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr)); if (nh_len == -EINVAL) { skb->transport_header = skb->network_header; error = 0; } else { error = nh_len; } return error; } if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) key->ip.frag = OVS_FRAG_TYPE_FIRST; /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { struct tcphdr *tcp = tcp_hdr(skb); key->tp.src = tcp->source; key->tp.dst = tcp->dest; key->tp.flags = TCP_FLAGS_BE16(tcp); } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->tp.src = udp->source; key->tp.dst = udp->dest; } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == NEXTHDR_SCTP) { if (sctphdr_ok(skb)) { struct sctphdr *sctp = sctp_hdr(skb); key->tp.src = sctp->source; key->tp.dst = sctp->dest; } else { memset(&key->tp, 0, sizeof(key->tp)); } } else if (key->ip.proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { error = parse_icmpv6(skb, key, nh_len); if (error) return error; } else { memset(&key->tp, 0, sizeof(key->tp)); } } } return 0; }
static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev) { struct fm10k_intfc *interface = netdev_priv(dev); unsigned int r_idx = skb->queue_mapping; int err; if ((skb->protocol == htons(ETH_P_8021Q)) && !vlan_tx_tag_present(skb)) { /* FM10K only supports hardware tagging, any tags in frame * are considered 2nd level or "outer" tags */ struct vlan_hdr *vhdr; __be16 proto; /* make sure skb is not shared */ skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return NETDEV_TX_OK; /* make sure there is enough room to move the ethernet header */ if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN))) return NETDEV_TX_OK; /* verify the skb head is not shared */ err = skb_cow_head(skb, 0); if (err) return NETDEV_TX_OK; /* locate vlan header */ vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); /* pull the 2 key pieces of data out of it */ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(vhdr->h_vlan_TCI)); proto = vhdr->h_vlan_encapsulated_proto; skb->protocol = (ntohs(proto) >= 1536) ? proto : htons(ETH_P_802_2); /* squash it by moving the ethernet addresses up 4 bytes */ memmove(skb->data + VLAN_HLEN, skb->data, 12); __skb_pull(skb, VLAN_HLEN); skb_reset_mac_header(skb); } /* The minimum packet size for a single buffer is 17B so pad the skb * in order to meet this minimum size requirement. */ if (unlikely(skb->len < 17)) { int pad_len = 17 - skb->len; if (skb_pad(skb, pad_len)) return NETDEV_TX_OK; __skb_put(skb, pad_len); } /* prepare packet for hardware time stamping */ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) fm10k_ts_tx_enqueue(interface, skb); if (r_idx >= interface->num_tx_queues) r_idx %= interface->num_tx_queues; err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]); return err; }
static int netdev_send(struct vport *vport, struct sk_buff *skb) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); int mtu = netdev_vport->dev->mtu; int len; if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) { if (net_ratelimit()) pr_warn("%s: dropped over-mtu packet: %d > %d\n", ovs_dp_name(vport->dp), packet_length(skb), mtu); goto error; } if (unlikely(skb_warn_if_lro(skb))) goto error; skb->dev = netdev_vport->dev; forward_ip_summed(skb, true); if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { int features; features = netif_skb_features(skb); if (!vlan_tso) features &= ~(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_UFO | NETIF_F_FSO); if (netif_needs_gso(skb, features)) { struct sk_buff *nskb; nskb = skb_gso_segment(skb, features); if (!nskb) { if (unlikely(skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) { kfree_skb(skb); return 0; } skb_shinfo(skb)->gso_type &= ~SKB_GSO_DODGY; goto tag; } if (IS_ERR(nskb)) { kfree_skb(skb); return 0; } consume_skb(skb); skb = nskb; len = 0; do { nskb = skb->next; skb->next = NULL; skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (likely(skb)) { len += skb->len; vlan_set_tci(skb, 0); dev_queue_xmit(skb); } skb = nskb; } while (skb); return len; } tag: skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb)); if (unlikely(!skb)) return 0; vlan_set_tci(skb, 0); } len = skb->len; dev_queue_xmit(skb); return len; error: kfree_skb(skb); ovs_vport_record_error(vport, VPORT_E_TX_DROPPED); return 0; }