Exemplo n.º 1
0
/* 
 *		Add an ip header to a skbuff and send it out.
 *
 */
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
			  u32 saddr, u32 daddr, struct ip_options *opt)
{
	struct inet_sock *inet = inet_sk(sk);
	struct rtable *rt = (struct rtable *)skb->dst;
	struct iphdr *iph;

	/* Build the IP header. */
	if (opt)
		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr) + opt->optlen);
	else
		iph=(struct iphdr *)skb_push(skb,sizeof(struct iphdr));

	iph->version  = 4;
	iph->ihl      = 5;
	iph->tos      = inet->tos;
	if (ip_dont_fragment(sk, &rt->u.dst))
		iph->frag_off = htons(IP_DF);
	else
		iph->frag_off = 0;
	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
	iph->daddr    = rt->rt_dst;
	iph->saddr    = rt->rt_src;
	iph->protocol = sk->sk_protocol;
	iph->tot_len  = htons(skb->len);
	ip_select_ident(iph, &rt->u.dst, sk);
	skb->nh.iph   = iph;

	if (opt && opt->optlen) {
		iph->ihl += opt->optlen>>2;
		ip_options_build(skb, opt, daddr, rt, 0);
	}
Exemplo n.º 2
0
static int raw_getrawfrag(const void *p, char *to, unsigned int offset,
				unsigned int fraglen, struct sk_buff *skb)
{
	struct rawfakehdr *rfh = (struct rawfakehdr *) p;

	if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen))
		return -EFAULT;

	if (!offset) {
		struct iphdr *iph = (struct iphdr *)to;
		if (!iph->saddr)
			iph->saddr = rfh->saddr;
		iph->check   = 0;
		iph->tot_len = htons(fraglen); /* This is right as you can't
						  frag RAW packets */
		/*
	 	 *	Deliberate breach of modularity to keep 
	 	 *	ip_build_xmit clean (well less messy).
		 */
		if (!iph->id)
			ip_select_ident(iph, rfh->dst, NULL);
		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}
	return 0;
}
Exemplo n.º 3
0
int ip_format_and_send_pkt(struct sk_buff *skb,struct sock*sk,__be32 saddr, __be32 daddr,u16 tcp_len)
{
	int ret	= 0;
	struct net 			*pnet 	= sock_net(sk);
	struct netns_ipv4	*n_ipv4 = &pnet->ipv4;
	struct inet_sock 	*inet 	= inet_sk(sk);
	struct tcphdr 		*th 	= tcp_hdr(skb);
	struct iphdr 		*iph	= ip_hdr(skb);

	iph->tos 		= inet->tos;
	iph->tot_len 	= htons(sizeof(struct iphdr) + tcp_len);
	iph->frag_off 	= htons(IP_DF);
	ip_select_ident(skb, sk);
	iph->ttl      	= ip_select_ttl(sk);
	iph->protocol 	= sk->sk_protocol;
	iph->daddr    	= daddr;
	iph->saddr    	= saddr;
	iph->check	  	= 0; 
	th->check 		= 0;	
	th->check 		= get_ipv4_psd_sum(iph);

	if(n_ipv4->aft_route_out){
		ret = n_ipv4->aft_route_out(skb);
		if(ret < 0)
			return US_ENETUNREACH;
	}

	return mbuf_format_and_send(skb, tcp_len);
}
Exemplo n.º 4
0
/*
 *		Add an ip header to a skbuff and send it out.
 *
 */
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
			  __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
{
	struct inet_sock *inet = inet_sk(sk);
	struct rtable *rt = skb_rtable(skb);
	struct iphdr *iph;

	/* Build the IP header. */
	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	iph->version  = 4;
	iph->ihl      = 5;
	iph->tos      = inet->tos;
	if (ip_dont_fragment(sk, &rt->dst))
		iph->frag_off = htons(IP_DF);
	else
		iph->frag_off = 0;
	iph->ttl      = ip_select_ttl(inet, &rt->dst);
	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
	iph->saddr    = saddr;
	iph->protocol = sk->sk_protocol;
	ip_select_ident(skb, sk);

	if (opt && opt->opt.optlen) {
		iph->ihl += opt->opt.optlen>>2;
		ip_options_build(skb, &opt->opt, daddr, rt, 0);
	}
int igmp_send_report_full(struct net_device *dev, u32 group, int type,
                          u8 respond, u32 dst)
{
    struct sk_buff *skb;
    struct iphdr *iph;
    struct igmphdr *ih;
    struct rtable *rt;

    if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
        return -1;
    if (rt->rt_src == 0) {
        ip_rt_put(rt);
        return -1;
    }

    skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC);
    if (skb == NULL) {
        ip_rt_put(rt);
        return -1;
    }

    skb->dst = &rt->u.dst;

    skb_reserve(skb, (dev->hard_header_len+15)&~15);

    skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);

    iph->version  = 4;
    iph->ihl      = (sizeof(struct iphdr)+4)>>2;
    iph->tos      = 0;
    iph->frag_off = htons(IP_DF);
    iph->ttl      = 1;
    iph->daddr    = dst;
    iph->saddr    = rt->rt_src;
    iph->protocol = IPPROTO_IGMP;
    iph->tot_len  = htons(IGMP_SIZE);
    ip_select_ident(iph, &rt->u.dst, NULL);
    ((u8*)&iph[1])[0] = IPOPT_RA;
    ((u8*)&iph[1])[1] = 4;
    ((u8*)&iph[1])[2] = 0;
    ((u8*)&iph[1])[3] = 0;
    ip_send_check(iph);

    ih = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
    ih->type=type;
    ih->code=respond;
    ih->csum=0;
    ih->group=group;
    ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));

    return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
                   output_maybe_reroute);
}
Exemplo n.º 6
0
/*
 *		Add an ip header to a skbuff and send it out.
 *
 */
int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,__be32 saddr, __be32 daddr)
{
	int ret = 0;
	struct inet_sock 	*inet 	= inet_sk(sk);
	struct tcphdr 		*th 	= tcp_hdr(skb);
	struct net 			*pnet 	= sock_net(sk);
	struct netns_ipv4	*n_ipv4 = &pnet->ipv4;
	struct iphdr *iph;

	// Build the IP header. 
	skb_push(skb, sizeof(struct iphdr));  ////+ (opt ? opt->opt.optlen : 0)
	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	
	iph->version  = 4;
	iph->ihl      = 5;		
	//iph->tos      = 0;//inet->tos;
	iph->tos      = inet->tos;
	iph->tot_len  = htons(skb->len);
	iph->frag_off = htons(IP_DF);		//smallboy :must before ip_select_ident;
	
	ip_select_ident(skb, sk);
	iph->ttl      = ip_select_ttl(sk);
	iph->protocol = sk->sk_protocol;
	iph->daddr    = daddr;
	iph->saddr    = saddr;
	
	iph->check	  = 0;
	//iph->check    = ip_fast_csum(iph, iph->ihl);

	th->check = 0;
	//th->check = get_ipv4_udptcp_checksum(iph , th);
	th->check = get_ipv4_psd_sum(iph);
	
	//skb->mark = sk->sk_mark;

	//fprintf(stderr,"TH:%u,src:%s,dst:%s,sport:%u,dport:%u,seq:%-12u,ack:%-12u,ipid:%-12u,len:%-12u,SYN:%u;PSH:%u;ACK:%u;FIN:%u;RST:%u; send!!!\n"
	//		,US_GET_LCORE(),trans_ip(iph->saddr),trans_ip(iph->daddr)
	//		,ntohs(th->source),ntohs(th->dest)
	//		,ntohl(th->seq),ntohl(th->ack_seq)
	//		,iph->id,skb->len,th->syn,th->psh,th->ack,th->fin,th->rst);	

	if(n_ipv4->aft_route_out){
		ret = n_ipv4->aft_route_out(skb);
		if(ret < 0)
			return US_ENETUNREACH;
	}

	// Send it out. 
	return ip_local_out(skb);		
}
Exemplo n.º 7
0
/* Add encapsulation header.
 *
 * The top IP header will be constructed per RFC 2401.
 */
static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
	struct iphdr *top_iph;
	int flags;

	skb_set_network_header(skb, -x->props.header_len);
	skb->mac_header = skb->network_header +
			  offsetof(struct iphdr, protocol);
	skb->transport_header = skb->network_header + sizeof(*top_iph);
	top_iph = ip_hdr(skb);

	top_iph->ihl = 5;
	top_iph->version = 4;

	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);

	/* DS disclosing depends on XFRM_SA_XFLAG_DONT_ENCAP_DSCP */
	if (x->props.extra_flags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP)
		top_iph->tos = 0;
	else
		top_iph->tos = XFRM_MODE_SKB_CB(skb)->tos;
	top_iph->tos = INET_ECN_encapsulate(top_iph->tos,
					    XFRM_MODE_SKB_CB(skb)->tos);

	flags = x->props.flags;
	if (flags & XFRM_STATE_NOECN)
		IP_ECN_clear(top_iph);

	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));

	top_iph->ttl = ip4_dst_hoplimit(dst->child);

	top_iph->saddr = x->props.saddr.a4;
	top_iph->daddr = x->id.daddr.a4;
	ip_select_ident(dev_net(dst->dev), skb, NULL);

	return 0;
}
/* Add encapsulation header.
 *
 * The top IP header will be constructed per RFC 2401.
 */
static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
{
	struct dst_entry *dst = skb_dst(skb);
	struct iphdr *top_iph;
	int flags;

	skb_set_network_header(skb, -x->props.header_len);
	skb->mac_header = skb->network_header +
			  offsetof(struct iphdr, protocol);
	skb->transport_header = skb->network_header + sizeof(*top_iph);
	top_iph = ip_hdr(skb);

	top_iph->ihl = 5;
	top_iph->version = 4;

	top_iph->protocol = xfrm_af2proto(skb_dst(skb)->ops->family);

	/* DS disclosed */
	top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos,
					    XFRM_MODE_SKB_CB(skb)->tos);

	flags = x->props.flags;
	if (flags & XFRM_STATE_NOECN)
		IP_ECN_clear(top_iph);

	top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
		0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
	ip_select_ident(top_iph, dst->child, NULL);

	top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);

	top_iph->saddr = x->props.saddr.a4;
	top_iph->daddr = x->id.daddr.a4;

	return 0;
}
Exemplo n.º 9
0
static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
{
	struct sock *sk = (struct sock *) chan->private;
	struct pppox_sock *po = pppox_sk(sk);
	struct pptp_opt *opt=&po->proto.pptp;
	struct pptp_gre_header *hdr;
	unsigned int header_len=sizeof(*hdr);
	int len=skb?skb->len:0;
	int err=0;
	int window;

	struct rtable *rt;     			/* Route to the other host */
	struct net_device *tdev;			/* Device to other host */
	struct iphdr  *iph;			/* Our new IP header */
	int    max_headroom;			/* The extra header space needed */

	INC_TX_PACKETS;

	spin_lock_bh(&opt->xmit_lock);
	
	window=WRAPPED(opt->ack_recv,opt->seq_sent)?(__u32)0xffffffff-opt->seq_sent+opt->ack_recv:opt->seq_sent-opt->ack_recv;

	if (!skb){
	    if (opt->ack_sent == opt->seq_recv) goto exit;
	}else if (window>opt->window){
		__set_bit(PPTP_FLAG_PAUSE,(unsigned long*)&opt->flags);
		#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
		mod_timer(&opt->ack_timeout_timer,opt->stat->rtt/100*HZ/10000);
		#else
		schedule_delayed_work(&opt->ack_timeout_work,opt->stat->rtt/100*HZ/10000);
		#endif
		goto exit;
	}

	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	{
		struct rt_key key = {
			.dst=opt->dst_addr.sin_addr.s_addr,
			.src=opt->src_addr.sin_addr.s_addr,
			.tos=RT_TOS(0),
		};
		if ((err=ip_route_output_key(&rt, &key))) {
			goto tx_error;
		}
	}
	#else
	{
		struct flowi fl = { .oif = 0,
				    .nl_u = { .ip4_u =
					      { .daddr = opt->dst_addr.sin_addr.s_addr,
						.saddr = opt->src_addr.sin_addr.s_addr,
						.tos = RT_TOS(0) } },
				    .proto = IPPROTO_GRE };
		if ((err=ip_route_output_key(&rt, &fl))) {
			goto tx_error;
		}
	}
	#endif
	tdev = rt->u.dst.dev;
	
	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	max_headroom = ((tdev->hard_header_len+15)&~15) + sizeof(*iph)+sizeof(*hdr)+2;
	#else
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph)+sizeof(*hdr)+2;
	#endif
	

	if (!skb){
		skb=dev_alloc_skb(max_headroom);
		if (!skb) {
			ip_rt_put(rt);
			goto tx_error;
		}
		skb_reserve(skb,max_headroom-skb_headroom(skb));
	}else if (skb_headroom(skb) < max_headroom ||
						skb_cloned(skb) || skb_shared(skb)) {
		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
		if (!new_skb) {
			ip_rt_put(rt);
			goto tx_error;
		}
		if (skb->sk)
		skb_set_owner_w(new_skb, skb->sk);
		kfree_skb(skb);
		skb = new_skb;
	}
	
	if (skb->len){
		int islcp;
		unsigned char *data=skb->data;
		islcp=((data[0] << 8) + data[1])== PPP_LCP && 1 <= data[2] && data[2] <= 7;		
		
		/* compress protocol field */
		if ((opt->ppp_flags & SC_COMP_PROT) && data[0]==0 && !islcp)
			skb_pull(skb,1);
		
		/*
		 * Put in the address/control bytes if necessary
		 */
		if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) {
			data=skb_push(skb,2);
			data[0]=0xff;
			data[1]=0x03;
		}
	}
	len=skb->len;

	if (len==0) header_len-=sizeof(hdr->seq);
	if (opt->ack_sent == opt->seq_recv) header_len-=sizeof(hdr->ack);

	// Push down and install GRE header
	skb_push(skb,header_len);
	hdr=(struct pptp_gre_header *)(skb->data);

	hdr->flags       = PPTP_GRE_FLAG_K;
	hdr->ver         = PPTP_GRE_VER;
	hdr->protocol    = htons(PPTP_GRE_PROTO);
	hdr->call_id     = htons(opt->dst_addr.call_id);

	if (!len){
		hdr->payload_len = 0;
		hdr->ver |= PPTP_GRE_FLAG_A;
		/* ack is in odd place because S == 0 */
		hdr->seq = htonl(opt->seq_recv);
		opt->ack_sent = opt->seq_recv;
		opt->stat->tx_acks++;
	}else {
		hdr->flags |= PPTP_GRE_FLAG_S;
		hdr->seq    = htonl(opt->seq_sent++);
		if (log_level>=3 && opt->seq_sent<=log_packets)
			printk(KERN_INFO"PPTP[%i]: send packet: seq=%i",opt->src_addr.call_id,opt->seq_sent);
		if (opt->ack_sent != opt->seq_recv)	{
		/* send ack with this message */
			hdr->ver |= PPTP_GRE_FLAG_A;
			hdr->ack  = htonl(opt->seq_recv);
			opt->ack_sent = opt->seq_recv;
			if (log_level>=3 && opt->seq_sent<=log_packets)
				printk(" ack=%i",opt->seq_recv);
		}
		hdr->payload_len = htons(len);
		if (log_level>=3 && opt->seq_sent<=log_packets)
			printk("\n");
	}

	/*
	 *	Push down and install the IP header.
	 */

	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
	skb->transport_header = skb->network_header;
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
	#else
	skb->h.raw = skb->nh.raw;
	skb->nh.raw = skb_push(skb, sizeof(*iph));
	#endif
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
			      IPSKB_REROUTED);
	#endif

	#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
	iph 			=	ip_hdr(skb);
	#else
	iph 			=	skb->nh.iph;
	#endif
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr) >> 2;
	iph->frag_off		=	0;//df;
	iph->protocol		=	IPPROTO_GRE;
	iph->tos		=	0;
	iph->daddr		=	rt->rt_dst;
	iph->saddr		=	rt->rt_src;
	#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
	iph->ttl = sysctl_ip_default_ttl;
	#else
	iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
	#endif
	iph->tot_len = htons(skb->len);

	dst_release(skb->dst);
	skb->dst = &rt->u.dst;
	
	nf_reset(skb);

	skb->ip_summed = CHECKSUM_NONE;
	ip_select_ident(iph, &rt->u.dst, NULL);
	ip_send_check(iph);

	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output);
	
	wake_up(&opt->wait);

	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
		opt->stat->tx_sent++;
		if (!opt->stat->pt_seq){
			opt->stat->pt_seq  = opt->seq_sent;
			do_gettimeofday(&opt->stat->pt_time);
		}
	}else{
		INC_TX_ERRORS;
		opt->stat->tx_failed++;	
	}

	spin_unlock_bh(&opt->xmit_lock);
	return 1;

tx_error:
	INC_TX_ERRORS;
	opt->stat->tx_failed++;
	if (!len) kfree_skb(skb);
	spin_unlock_bh(&opt->xmit_lock);
	return 1;
exit:
	spin_unlock_bh(&opt->xmit_lock);
	return 0;
}
Exemplo n.º 10
0
static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
			   void *from, size_t length,
			   struct rtable **rtp,
			   unsigned int flags)
{
	struct inet_sock *inet = inet_sk(sk);
	struct net *net = sock_net(sk);
	struct iphdr *iph;
	struct sk_buff *skb;
	unsigned int iphlen;
	int err;
	struct rtable *rt = *rtp;
	int hlen, tlen;

	if (length > rt->dst.dev->mtu) {
		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
			       rt->dst.dev->mtu);
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

	hlen = LL_RESERVED_SPACE(rt->dst.dev);
	tlen = rt->dst.dev->needed_tailroom;
	skb = sock_alloc_send_skb(sk,
				  length + hlen + tlen + 15,
				  flags & MSG_DONTWAIT, &err);
	if (skb == NULL)
		goto error;
	skb_reserve(skb, hlen);

	skb->priority = sk->sk_priority;
	skb->mark = sk->sk_mark;
	skb_dst_set(skb, &rt->dst);
	*rtp = NULL;

	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	skb_put(skb,sizeof(*iph));

	skb->ip_summed = CHECKSUM_NONE;

	skb->transport_header = skb->network_header;
	err = -EFAULT;
	if (!memcpy_fromiovecend2((void *)iph, from, 0, length))
		goto error_free;

	iphlen = iph->ihl * 4;

	/*
	 * We don't want to modify the ip header, but we do need to
	 * be sure that it won't cause problems later along the network
	 * stack.  Specifically we want to make sure that iph->ihl is a
	 * sane value.  If ihl points beyond the length of the buffer passed
	 * in, reject the frame as invalid
	 */
	err = -EINVAL;
	if (iphlen > length)
		goto error_free;

	if (iphlen >= sizeof(*iph)) {
		if (!iph->saddr)
			iph->saddr = fl4->saddr;
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
			ip_select_ident(skb, &rt->dst, NULL);

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}
	if (iph->protocol == IPPROTO_ICMP)
		icmp_out_count(net, ((struct icmphdr *)
			skb_transport_header(skb))->type);

	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
		      rt->dst.dev, dst_output);
	if (err > 0)
		err = net_xmit_errno(err);
	if (err)
		goto error;
out:
	return 0;

error_free:
	kfree_skb(skb);
error:
	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
	if (err == -ENOBUFS && !inet->recverr)
		err = 0;
	return err;
}
Exemplo n.º 11
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
	struct rtable *rt;			/* Route to the other host */
	__be32 saddr;				/* Source for tunnel */
	struct net_device *tdev;		/* Device to other host */
	struct iphdr  *old_iph = ip_hdr(skb);
	u8     tos = old_iph->tos;
	__be16 df;
	struct iphdr  *iph;			/* Our new IP header */
	unsigned int max_headroom;		/* The extra header space needed */
	int ret, local;

	EnterFunction(10);

	rcu_read_lock();
	local = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_CONNECT |
				   IP_VS_RT_MODE_TUNNEL, &saddr);
	if (local < 0)
		goto tx_error;
	if (local) {
		rcu_read_unlock();
		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
	}

	rt = skb_rtable(skb);
	tdev = rt->dst.dev;

	/* Copy DF, reset fragment offset and MF */
	df = sysctl_pmtu_disc(ipvs) ? old_iph->frag_off & htons(IP_DF) : 0;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

	if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
		struct sk_buff *new_skb =
			skb_realloc_headroom(skb, max_headroom);

		if (!new_skb)
			goto tx_error;
		consume_skb(skb);
		skb = new_skb;
		old_iph = ip_hdr(skb);
	}

	skb->transport_header = skb->network_header;

	/* fix old IP header checksum */
	ip_send_check(old_iph);

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ip_hdr(skb);
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	IPPROTO_IPIP;
	iph->tos		=	tos;
	iph->daddr		=	cp->daddr.ip;
	iph->saddr		=	saddr;
	iph->ttl		=	old_iph->ttl;
	ip_select_ident(skb, &rt->dst, NULL);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->local_df = 1;

	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
	if (ret == NF_ACCEPT)
		ip_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);
	rcu_read_unlock();

	LeaveFunction(10);

	return NF_STOLEN;

  tx_error:
	kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
Exemplo n.º 12
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
		  struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
	struct net *net = skb_net(skb);
	struct netns_ipvs *ipvs = net_ipvs(net);
	struct rtable *rt;			/* Route to the other host */
	__be32 saddr;				/* Source for tunnel */
	struct net_device *tdev;		/* Device to other host */
	__u8 next_protocol = 0;
	__u8 dsfield = 0;
	__u8 ttl = 0;
	__be16 df = 0;
	__be16 *dfp = NULL;
	struct iphdr  *iph;			/* Our new IP header */
	unsigned int max_headroom;		/* The extra header space needed */
	int ret, local;

	EnterFunction(10);

	rcu_read_lock();
	local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
				   IP_VS_RT_MODE_LOCAL |
				   IP_VS_RT_MODE_NON_LOCAL |
				   IP_VS_RT_MODE_CONNECT |
				   IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh);
	if (local < 0)
		goto tx_error;
	if (local) {
		rcu_read_unlock();
		return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1);
	}

	rt = skb_rtable(skb);
	tdev = rt->dst.dev;

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

	/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
	dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
	skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
					 &next_protocol, NULL, &dsfield,
					 &ttl, dfp);
	if (IS_ERR(skb))
		goto tx_error;

	skb = iptunnel_handle_offloads(
		skb, false, __tun_gso_type_mask(AF_INET, cp->af));
	if (IS_ERR(skb))
		goto tx_error;

	skb->transport_header = skb->network_header;

	skb_push(skb, sizeof(struct iphdr));
	skb_reset_network_header(skb);
	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

	/*
	 *	Push down and install the IPIP header.
	 */
	iph			=	ip_hdr(skb);
	iph->version		=	4;
	iph->ihl		=	sizeof(struct iphdr)>>2;
	iph->frag_off		=	df;
	iph->protocol		=	next_protocol;
	iph->tos		=	dsfield;
	iph->daddr		=	cp->daddr.ip;
	iph->saddr		=	saddr;
	iph->ttl		=	ttl;
	ip_select_ident(net, skb, NULL);

	/* Another hack: avoid icmp_send in ip_fragment */
	skb->ignore_df = 1;

	ret = ip_vs_tunnel_xmit_prepare(skb, cp);
	if (ret == NF_ACCEPT)
		ip_local_out(skb);
	else if (ret == NF_DROP)
		kfree_skb(skb);
	rcu_read_unlock();

	LeaveFunction(10);

	return NF_STOLEN;

  tx_error:
	if (!IS_ERR(skb))
		kfree_skb(skb);
	rcu_read_unlock();
	LeaveFunction(10);
	return NF_STOLEN;
}
Exemplo n.º 13
0
static int raw_send_hdrinc(struct sock *sk, void *from, int length,
			struct rtable *rt, 
			unsigned int flags)
{
	struct inet_sock *inet = inet_sk(sk);
	int hh_len;
	struct iphdr *iph;
	struct sk_buff *skb;
	int err;

	if (length > rt->u.dst.dev->mtu) {
		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
			       rt->u.dst.dev->mtu);
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);

	skb = sock_alloc_send_skb(sk, length+hh_len+15,
				  flags&MSG_DONTWAIT, &err);
	if (skb == NULL)
		goto error; 
	skb_reserve(skb, hh_len);

	skb->priority = sk->sk_priority;
	skb->dst = dst_clone(&rt->u.dst);

	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);

	skb->ip_summed = CHECKSUM_NONE;

	skb->h.raw = skb->nh.raw;
	err = memcpy_fromiovecend((void *)iph, from, 0, length);
	if (err)
		goto error_fault;

	/* We don't modify invalid header */
	if (length >= sizeof(*iph) && iph->ihl * 4 <= length) {
		if (!iph->saddr)
			iph->saddr = rt->rt_src;
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
			ip_select_ident(iph, &rt->u.dst, NULL);

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}

	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
		      dst_output);
	if (err > 0)
		err = inet->recverr ? net_xmit_errno(err) : 0;
	if (err)
		goto error;
out:
	return 0;

error_fault:
	err = -EFAULT;
	kfree_skb(skb);
error:
	IP_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
	return err; 
}
Exemplo n.º 14
0
int esp_output(struct sk_buff *skb)
{
	int err;
	struct dst_entry *dst = skb->dst;
	struct xfrm_state *x  = dst->xfrm;
	struct iphdr *iph, *top_iph;
	struct ip_esp_hdr *esph;
	struct crypto_tfm *tfm;
	struct esp_data *esp;
	struct sk_buff *trailer;
	struct udphdr *uh = NULL;
	struct xfrm_encap_tmpl *encap = NULL;
	int blksize;
	int clen;
	int alen;
	int nfrags;
	union {
		struct iphdr	iph;
		char 		buf[60];
	} tmp_iph;

	/* First, if the skb is not checksummed, complete checksum. */
	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
		err = -EINVAL;
		goto error_nolock;
	}

	spin_lock_bh(&x->lock);
	err = xfrm_check_output(x, skb, AF_INET);
	if (err)
		goto error;
	err = -ENOMEM;

	/* Strip IP header in transport mode. Save it. */
	if (!x->props.mode) {
		iph = skb->nh.iph;
		memcpy(&tmp_iph, iph, iph->ihl*4);
		__skb_pull(skb, iph->ihl*4);
	}
	/* Now skb is pure payload to encrypt */

	/* Round to block size */
	clen = skb->len;

	esp = x->data;
	alen = esp->auth.icv_trunc_len;
	tfm = esp->conf.tfm;
	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
	clen = (clen + 2 + blksize-1)&~(blksize-1);
	if (esp->conf.padlen)
		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);

	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
		goto error;

	/* Fill padding... */
	do {
		int i;
		for (i=0; i<clen-skb->len - 2; i++)
			*(u8*)(trailer->tail + i) = i+1;
	} while (0);
	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
	pskb_put(skb, trailer, clen - skb->len);

	encap = x->encap;

	iph = skb->nh.iph;
	if (x->props.mode) {
		top_iph = (struct iphdr*)skb_push(skb, x->props.header_len);
		esph = (struct ip_esp_hdr*)(top_iph+1);
		if (encap && encap->encap_type) {
			switch (encap->encap_type) {
			case UDP_ENCAP_ESPINUDP:
				uh = (struct udphdr*) esph;
				esph = (struct ip_esp_hdr*)(uh+1);
				top_iph->protocol = IPPROTO_UDP;
				break;
			default:
				printk(KERN_INFO
				       "esp_output(): Unhandled encap: %u\n",
				       encap->encap_type);
				top_iph->protocol = IPPROTO_ESP;
				break;
			}
		} else
			top_iph->protocol = IPPROTO_ESP;
		*(u8*)(trailer->tail - 1) = IPPROTO_IPIP;
		top_iph->ihl = 5;
		top_iph->version = 4;
		top_iph->tos = iph->tos;	/* DS disclosed */
		if (x->props.flags & XFRM_STATE_NOECN)
			IP_ECN_clear(top_iph);
		top_iph->tot_len = htons(skb->len + alen);
		top_iph->frag_off = iph->frag_off&htons(IP_DF);
		if (!(top_iph->frag_off))
			ip_select_ident(top_iph, dst, 0);
		top_iph->ttl = iph->ttl;	/* TTL disclosed */
		top_iph->check = 0;
		top_iph->saddr = x->props.saddr.a4;
		top_iph->daddr = x->id.daddr.a4;
		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
	} else {
		esph = (struct ip_esp_hdr*)skb_push(skb, x->props.header_len);
		top_iph = (struct iphdr*)skb_push(skb, iph->ihl*4);
		memcpy(top_iph, &tmp_iph, iph->ihl*4);
		if (encap && encap->encap_type) {
			switch (encap->encap_type) {
			case UDP_ENCAP_ESPINUDP:
				uh = (struct udphdr*) esph;
				esph = (struct ip_esp_hdr*)(uh+1);
				top_iph->protocol = IPPROTO_UDP;
				break;
			default:
				printk(KERN_INFO
				       "esp_output(): Unhandled encap: %u\n",
				       encap->encap_type);
				top_iph->protocol = IPPROTO_ESP;
				break;
			}
		} else
			top_iph->protocol = IPPROTO_ESP;
		iph = &tmp_iph.iph;
		top_iph->tot_len = htons(skb->len + alen);
		top_iph->check = 0;
		top_iph->frag_off = iph->frag_off;
		*(u8*)(trailer->tail - 1) = iph->protocol;
	}

	/* this is non-NULL only with UDP Encapsulation */
	if (encap && uh) {
		uh->source = encap->encap_sport;
		uh->dest = encap->encap_dport;
		uh->len = htons(skb->len + alen - sizeof(struct iphdr));
		uh->check = 0;
	}

	esph->spi = x->id.spi;
	esph->seq_no = htonl(++x->replay.oseq);

	if (esp->conf.ivlen)
		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));

	do {
		struct scatterlist sgbuf[nfrags>MAX_SG_ONSTACK ? 0 : nfrags];
		struct scatterlist *sg = sgbuf;

		if (unlikely(nfrags > MAX_SG_ONSTACK)) {
			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
			if (!sg)
				goto error;
		}
		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
		crypto_cipher_encrypt(tfm, sg, sg, clen);
		if (unlikely(sg != sgbuf))
			kfree(sg);
	} while (0);

	if (esp->conf.ivlen) {
		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
	}

	if (esp->auth.icv_full_len) {
		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
		pskb_put(skb, trailer, alen);
	}

	ip_send_check(top_iph);

	skb->nh.raw = skb->data;

	x->curlft.bytes += skb->len;
	x->curlft.packets++;
	spin_unlock_bh(&x->lock);
	if ((skb->dst = dst_pop(dst)) == NULL) {
		err = -EHOSTUNREACH;
		goto error_nolock;
	}
	return NET_XMIT_BYPASS;

error:
	spin_unlock_bh(&x->lock);
error_nolock:
	kfree_skb(skb);
	return err;
}
Exemplo n.º 15
0
//int encap(struct sk_buff *skb, struct tun_param *tp, unsigned int mark, struct ka_payload *kap){
int encap(struct sk_buff *skb, struct tun_param *tp, unsigned int mark, struct ka_payload *kap){
	struct sk_buff *new_skb;
	struct iphdr *old_iph;
	//struct iphdr *iph;
	struct net_device *dev; 
	int ret = 0;
	struct sock tmpsk;
	struct udphdr *udph;

	u32 saddr, daddr;
	u16 sport, dport;

	struct rtable *rt;
	struct flowi4 fl4;

	unsigned int pushroom, piggyroom;

	piggyroom = 0;
	if(kap->tid == 0) piggyroom = 0;
	else piggyroom = PIGGYROOM;
	pushroom = HEADROOM + piggyroom;

	old_iph = get_IP_header(skb);

	daddr = tp->tr.addr;

	sport = tp->tl.port;
	dport = tp->tr.port;

	// since 2.6.36 rtable no longer has rt_src attribute, now I get source address from the device (Sander)
	dev = dev_get_by_index(upmtns->net_ns, tp->tl.ifindex);

	if (!dev) {
		dmesge("no device found during encap");
		return -1;
	}
	saddr = get_dev_ip_address(dev, NULL, 0);

	// since 2.6.39 ip_route_output_key flowi parameter has been replaced with flowi4, rtable no longer has rt_src attribute
	// ip_route_output_key function now returns an rtable and should be replaced with ip_route_output_ports (Sander)
	
	tmpsk.sk_mark = mark;
	rt = ip_route_output_ports(dev_net(dev), &fl4, &tmpsk, daddr, 0, 0, 0, IPPROTO_IP, RT_TOS(old_iph->tos), tp->tl.ifindex);	

	if (!rt) {
		dmesge("encap - no rtable found during encap");
		ret = -1;
		goto end;
	}

	if (skb_headroom(skb) < pushroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
		new_skb = skb_realloc_headroom(skb, pushroom);
		printk(" --- socket buffer REALLOCATION\n");
		if (!new_skb) {
			dmesge("encap - can not realloc skb room");
			ret = -1;
			goto end;
		}
		if (skb->sk) skb_set_owner_w(new_skb, skb->sk);
		dev_kfree_skb_any(skb);
		skb = new_skb;
		old_iph = ip_hdr(skb);
	}

	if(piggyroom > 0){
		skb_push(skb, PIGGYROOM);
		//print_ka_info(kap->info);
		memcpy(skb->data, kap, sizeof(struct ka_payload));
		skb_push(skb, HEADROOM);
	}
	else{
		//printk("Creating normal packet...\n");
		skb_push(skb, HEADROOM);
	}

	skb_reset_network_header(skb);

	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
	skb_dst_drop(skb);
	// since 2.6.36 rtable has no 'u' attribute, you can use dst directly (Sander)
	skb_dst_set(skb, &rt->dst);

	set_ip_header(skb, old_iph, saddr, daddr, piggyroom);

	set_udp_header(skb, old_iph, sport, dport, piggyroom);

	nf_reset(skb);
	// since 2.6.36 rtable has no 'u' attribute, you can use dst directly (Sander)
	ip_select_ident(ip_hdr(skb), &rt->dst, NULL);

	skb->mark = mark; //useless...

	/*************/
	udph = get_UDP_header(skb);
	//dmesg("ENCAP ---> UDP src: %u dst: %u mark: %u", ntohs(udph->source), ntohs(udph->dest),skb->mark);

end:
	dev_put(dev);
	return ret;
}
Exemplo n.º 16
0
static void send_unreach(struct sk_buff *skb_in, int code)
{
	struct iphdr *iph;
	struct udphdr *udph;
	struct icmphdr *icmph;
	struct sk_buff *nskb;
	u32 saddr;
	u8 tos;
	int hh_len, length;
	struct rtable *rt = (struct rtable*)skb_in->dst;
	unsigned char *data;

	if (!rt)
		return;

	/* FIXME: Use sysctl number. --RR */
	if (!xrlim_allow(&rt->u.dst, 1*HZ))
		return;

	iph = skb_in->nh.iph;

	/* No replies to physical multicast/broadcast */
	if (skb_in->pkt_type!=PACKET_HOST)
		return;

	/* Now check at the protocol level */
	if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
		return;

	/* Only reply to fragment 0. */
	if (iph->frag_off&htons(IP_OFFSET))
		return;

	/* if UDP checksum is set, verify it's correct */
	if (iph->protocol == IPPROTO_UDP
	    && skb_in->tail-(u8*)iph >= sizeof(struct udphdr)) {
		int datalen = skb_in->len - (iph->ihl<<2);
		udph = (struct udphdr *)((char *)iph + (iph->ihl<<2));
		if (udph->check
		    && csum_tcpudp_magic(iph->saddr, iph->daddr,
		                         datalen, IPPROTO_UDP,
		                         csum_partial((char *)udph, datalen,
		                                      0)) != 0)
			return;
	}
		    
	/* If we send an ICMP error to an ICMP error a mess would result.. */
	if (iph->protocol == IPPROTO_ICMP
	    && skb_in->tail-(u8*)iph >= sizeof(struct icmphdr)) {
		icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
		/* Between echo-reply (0) and timestamp (13),
		   everything except echo-request (8) is an error.
		   Also, anything greater than NR_ICMP_TYPES is
		   unknown, and hence should be treated as an error... */
		if ((icmph->type < ICMP_TIMESTAMP
		     && icmph->type != ICMP_ECHOREPLY
		     && icmph->type != ICMP_ECHO)
		    || icmph->type > NR_ICMP_TYPES)
			return;
	}

	saddr = iph->daddr;
	if (!(rt->rt_flags & RTCF_LOCAL))
		saddr = 0;

	tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;

	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
		return;

	/* RFC says return as much as we can without exceeding 576 bytes. */
	length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);

	if (length > rt->u.dst.pmtu)
		length = rt->u.dst.pmtu;
	if (length > 576)
		length = 576;

	hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;

	nskb = alloc_skb(hh_len+15+length, GFP_ATOMIC);
	if (!nskb) {
		ip_rt_put(rt);
		return;
	}

	nskb->priority = 0;
	nskb->dst = &rt->u.dst;
	skb_reserve(nskb, hh_len);

	/* Set up IP header */
	iph = nskb->nh.iph
		= (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
	iph->version=4;
	iph->ihl=5;
	iph->tos=tos;
	iph->tot_len = htons(length);

	/* PMTU discovery never applies to ICMP packets. */
	iph->frag_off = 0;

	iph->ttl = MAXTTL;
	ip_select_ident(iph, &rt->u.dst, NULL);
	iph->protocol=IPPROTO_ICMP;
	iph->saddr=rt->rt_src;
	iph->daddr=rt->rt_dst;
	iph->check=0;
	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);

	/* Set up ICMP header. */
	icmph = nskb->h.icmph
		= (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
	icmph->type = ICMP_DEST_UNREACH;
	icmph->code = code;	
	icmph->un.gateway = 0;
	icmph->checksum = 0;
	
	/* Copy as much of original packet as will fit */
	data = skb_put(nskb,
		       length - sizeof(struct iphdr) - sizeof(struct icmphdr));
	/* FIXME: won't work with nonlinear skbs --RR */
	memcpy(data, skb_in->nh.iph,
	       length - sizeof(struct iphdr) - sizeof(struct icmphdr));
	icmph->checksum = ip_compute_csum((unsigned char *)icmph,
					  length - sizeof(struct iphdr));

	nf_ct_attach(nskb, skb_in);

	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
		ip_finish_output);
}	
Exemplo n.º 17
0
/*
 *   IP Tunneling transmitter
 *
 *   This function encapsulates the packet in a new IP packet, its
 *   destination will be set to cp->daddr. Most code of this function
 *   is taken from ipip.c.
 *
 *   It is used in VS/TUN cluster. The load balancer selects a real
 *   server from a cluster based on a scheduling algorithm,
 *   encapsulates the request packet and forwards it to the selected
 *   server. For example, all real servers are configured with
 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
 *   the encapsulated packet, it will decapsulate the packet, processe
 *   the request and return the response packets directly to the client
 *   without passing the load balancer. This can greatly increase the
 *   scalability of virtual server.
 *
 *   Used for ANY protocol
 */
int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
                  struct ip_vs_protocol *pp)
{
    struct rtable *rt;			/* Route to the other host */
    struct net_device *tdev;		/* Device to other host */
    struct iphdr  *old_iph = ip_hdr(skb);
    u8     tos = old_iph->tos;
    __be16 df = old_iph->frag_off;
    struct iphdr  *iph;			/* Our new IP header */
    unsigned int max_headroom;		/* The extra header space needed */
    int    mtu;
    int ret;

    EnterFunction(10);

    if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
                                  RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
                                  IP_VS_RT_MODE_NON_LOCAL)))
        goto tx_error_icmp;
    if (rt->rt_flags & RTCF_LOCAL) {
        ip_rt_put(rt);
        IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
    }

    tdev = rt->dst.dev;

    mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
    if (mtu < 68) {
        IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
        goto tx_error_put;
    }
    if (skb_dst(skb))
        skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);

    df |= (old_iph->frag_off & htons(IP_DF));

    if ((old_iph->frag_off & htons(IP_DF) &&
            mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
        icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
        IP_VS_DBG_RL("%s(): frag needed\n", __func__);
        goto tx_error_put;
    }

    /*
     * Okay, now see if we can stuff it in the buffer as-is.
     */
    max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

    if (skb_headroom(skb) < max_headroom
            || skb_cloned(skb) || skb_shared(skb)) {
        struct sk_buff *new_skb =
            skb_realloc_headroom(skb, max_headroom);
        if (!new_skb) {
            ip_rt_put(rt);
            kfree_skb(skb);
            IP_VS_ERR_RL("%s(): no memory\n", __func__);
            return NF_STOLEN;
        }
        kfree_skb(skb);
        skb = new_skb;
        old_iph = ip_hdr(skb);
    }

    skb->transport_header = skb->network_header;

    /* fix old IP header checksum */
    ip_send_check(old_iph);

    skb_push(skb, sizeof(struct iphdr));
    skb_reset_network_header(skb);
    memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));

    /* drop old route */
    skb_dst_drop(skb);
    skb_dst_set(skb, &rt->dst);

    /*
     *	Push down and install the IPIP header.
     */
    iph			=	ip_hdr(skb);
    iph->version		=	4;
    iph->ihl		=	sizeof(struct iphdr)>>2;
    iph->frag_off		=	df;
    iph->protocol		=	IPPROTO_IPIP;
    iph->tos		=	tos;
    iph->daddr		=	rt->rt_dst;
    iph->saddr		=	rt->rt_src;
    iph->ttl		=	old_iph->ttl;
    ip_select_ident(iph, &rt->dst, NULL);

    /* Another hack: avoid icmp_send in ip_fragment */
    skb->local_df = 1;

    ret = IP_VS_XMIT_TUNNEL(skb, cp);
    if (ret == NF_ACCEPT)
        ip_local_out(skb);
    else if (ret == NF_DROP)
        kfree_skb(skb);

    LeaveFunction(10);

    return NF_STOLEN;

tx_error_icmp:
    dst_link_failure(skb);
tx_error:
    kfree_skb(skb);
    LeaveFunction(10);
    return NF_STOLEN;
tx_error_put:
    ip_rt_put(rt);
    goto tx_error;
}
Exemplo n.º 18
0
static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
			struct rtable *rt,
			unsigned int flags)
{
	struct inet_sock *inet = inet_sk(sk);
	struct net *net = sock_net(sk);
	struct iphdr *iph;
	struct sk_buff *skb;
	unsigned int iphlen;
	int err;

	if (length > rt->u.dst.dev->mtu) {
		ip_local_error(sk, EMSGSIZE, rt->rt_dst, inet->dport,
			       rt->u.dst.dev->mtu);
		return -EMSGSIZE;
	}
	if (flags&MSG_PROBE)
		goto out;

	skb = sock_alloc_send_skb(sk,
				  length + LL_ALLOCATED_SPACE(rt->u.dst.dev) + 15,
				  flags & MSG_DONTWAIT, &err);
	if (skb == NULL)
		goto error;
	skb_reserve(skb, LL_RESERVED_SPACE(rt->u.dst.dev));

	skb->priority = sk->sk_priority;
	skb->mark = sk->sk_mark;
	skb_dst_set(skb, dst_clone(&rt->u.dst));

	skb_reset_network_header(skb);
	iph = ip_hdr(skb);
	skb_put(skb, length);

	skb->ip_summed = CHECKSUM_NONE;

	skb->transport_header = skb->network_header;
	err = memcpy_fromiovecend((void *)iph, from, 0, length);
	if (err)
		goto error_fault;

	/* We don't modify invalid header */
	iphlen = iph->ihl * 4;
	if (iphlen >= sizeof(*iph) && iphlen <= length) {
		if (!iph->saddr)
			iph->saddr = rt->rt_src;
		iph->check   = 0;
		iph->tot_len = htons(length);
		if (!iph->id)
			ip_select_ident(iph, &rt->u.dst, NULL);

		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
	}
	if (iph->protocol == IPPROTO_ICMP)
		icmp_out_count(net, ((struct icmphdr *)
			skb_transport_header(skb))->type);

	err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
		      dst_output);
	if (err > 0)
		err = net_xmit_errno(err);
	if (err)
		goto error;
out:
	return 0;

error_fault:
	err = -EFAULT;
	kfree_skb(skb);
error:
	IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
	if (err == -ENOBUFS && !inet->recverr)
		err = 0;
	return err;
}