Exemple #1
0
static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
{
	struct sock *sk=icmp_socket->sk;
	struct ipcm_cookie ipc;
	struct rtable *rt = (struct rtable*)skb->dst;
	u32 daddr;

	if (ip_options_echo(&icmp_param->replyopts, skb))
		return;

	icmp_param->icmph.checksum=0;
	icmp_param->csum=0;
	icmp_out_count(icmp_param->icmph.type);

	sk->ip_tos = skb->nh.iph->tos;
	daddr = ipc.addr = rt->rt_src;
	ipc.opt = &icmp_param->replyopts;
	if (ipc.opt->srr)
		daddr = icmp_param->replyopts.faddr;
	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
		return;
	ip_build_xmit(sk, icmp_glue_bits, icmp_param, 
		icmp_param->data_len+sizeof(struct icmphdr),
		&ipc, rt, MSG_DONTWAIT);
	ip_rt_put(rt);
}
Exemple #2
0
static void icmp_build_xmit(struct icmp_bxm *icmp_param, __u32 saddr, __u32 daddr, __u8 tos)
{
	struct sock *sk=icmp_socket.data;
	icmp_param->icmph.checksum=0;
	icmp_param->csum=0;
	icmp_out_count(icmp_param->icmph.type);
	sk->ip_tos = tos;
	ip_build_xmit(sk, icmp_glue_bits, icmp_param, 
		icmp_param->data_len+sizeof(struct icmphdr),
		daddr, saddr, &icmp_param->replyopts, 0, IPPROTO_ICMP, 1);
}
Exemple #3
0
static int raw_sendto(struct sock *sk, const unsigned char *from, 
	int len, int noblock, unsigned flags, struct sockaddr_in *usin, int addr_len)
{
	int err;
	struct sockaddr_in sin;

	/*
	 *	Check the flags. Only MSG_DONTROUTE is permitted.
	 */

	if (flags & MSG_OOB)		/* Mirror BSD error message compatibility */
		return -EOPNOTSUPP;
			 
	if (flags & ~MSG_DONTROUTE)
		return(-EINVAL);
	/*
	 *	Get and verify the address. 
	 */

	if (usin) 
	{
		if (addr_len < sizeof(sin)) 
			return(-EINVAL);
		memcpy(&sin, usin, sizeof(sin));
		if (sin.sin_family && sin.sin_family != AF_INET) 
			return(-EINVAL);
	}
	else 
	{
		if (sk->state != TCP_ESTABLISHED) 
			return(-EINVAL);
		sin.sin_family = AF_INET;
		sin.sin_port = sk->num;
		sin.sin_addr.s_addr = sk->daddr;
	}
	if (sin.sin_port == 0) 
		sin.sin_port = sk->num;
  
	if (sin.sin_addr.s_addr == INADDR_ANY)
		sin.sin_addr.s_addr = ip_my_addr();

	/*
	 *	BSD raw sockets forget to check SO_BROADCAST ....
	 */
	 
	if (!sk->bsdism && sk->broadcast == 0 && ip_chk_addr(sin.sin_addr.s_addr)==IS_BROADCAST)
		return -EACCES;

	if(sk->ip_hdrincl)
	{
		if(len>65535)
			return -EMSGSIZE;
		err=ip_build_xmit(sk, raw_getrawfrag, from, len, sin.sin_addr.s_addr, 0, sk->opt, flags, sin.sin_port, noblock);
	}
	else
	{
		if(len>65535-sizeof(struct iphdr))
			return -EMSGSIZE;
		err=ip_build_xmit(sk, raw_getfrag, from, len, sin.sin_addr.s_addr, 0, sk->opt, flags, sin.sin_port, noblock);
	}
	return err<0?err:len;
}
Exemple #4
0
/* Shared by v4/v6 tcp. */
unsigned short udp_good_socknum(void)
{
	int result;
	static int start = 0;
	int i, best, best_size_so_far;

	SOCKHASH_LOCK();

	/* Select initial not-so-random "best" */
	best = PROT_SOCK + 1 + (start & 1023);
	best_size_so_far = 32767;	/* "big" num */
	result = best;
	for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
		struct sock *sk;
		int size;

		sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];

		/* No clashes - take it */
		if (!sk)
			goto out;

		/* Is this one better than our best so far? */
		size = 0;
		do {
			if(++size >= best_size_so_far)
				goto next;
		} while((sk = sk->next) != NULL);
		best_size_so_far = size;
		best = result;
next:
	}

	while (udp_lport_inuse(best))
		best += UDP_HTABLE_SIZE;
	result = best;
out:
	start = result;
	SOCKHASH_UNLOCK();
	return result;
}

static void udp_v4_hash(struct sock *sk)
{
	struct sock **skp;
	int num = sk->num;

	num &= (UDP_HTABLE_SIZE - 1);
	skp = &udp_hash[num];

	SOCKHASH_LOCK();
	sk->next = *skp;
	*skp = sk;
	sk->hashent = num;
	SOCKHASH_UNLOCK();
}

static void udp_v4_unhash(struct sock *sk)
{
	struct sock **skp;
	int num = sk->num;

	num &= (UDP_HTABLE_SIZE - 1);
	skp = &udp_hash[num];

	SOCKHASH_LOCK();
	while(*skp != NULL) {
		if(*skp == sk) {
			*skp = sk->next;
			break;
		}
		skp = &((*skp)->next);
	}
	SOCKHASH_UNLOCK();
}

static void udp_v4_rehash(struct sock *sk)
{
	struct sock **skp;
	int num = sk->num;
	int oldnum = sk->hashent;

	num &= (UDP_HTABLE_SIZE - 1);
	skp = &udp_hash[oldnum];

	SOCKHASH_LOCK();
	while(*skp != NULL) {
		if(*skp == sk) {
			*skp = sk->next;
			break;
		}
		skp = &((*skp)->next);
	}
	sk->next = udp_hash[num];
	udp_hash[num] = sk;
	sk->hashent = num;
	SOCKHASH_UNLOCK();
}

/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
 * harder than this. -DaveM
 */
__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
				      struct device *dev)
{
	struct sock *sk, *result = NULL;
	unsigned short hnum = ntohs(dport);
	int badness = -1;

	for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
		if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
			int score = 0;
			if(sk->rcv_saddr) {
				if(sk->rcv_saddr != daddr)
					continue;
				score++;
			}
			if(sk->daddr) {
				if(sk->daddr != saddr)
					continue;
				score++;
			}
			if(sk->dummy_th.dest) {
				if(sk->dummy_th.dest != sport)
					continue;
				score++;
			}
			/* If this socket is bound to a particular interface,
			 * did the packet come in on it? */
			if (sk->bound_device) {
				if (dev == sk->bound_device)
					score++;
				else
					continue;  /* mismatch--not this sock */
			}
			if(score == 4) {
				result = sk;
				break;
			} else if(score > badness) {
				result = sk;
				badness = score;
			}
		}
	}
	return result;
}

#ifdef CONFIG_IP_TRANSPARENT_PROXY
struct sock *udp_v4_proxy_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, u32 paddr, u16 rport,
				 struct device *dev)
{
	struct sock *hh[3], *sk, *result = NULL;
	int i;
	int badness = -1;
	unsigned short hnum = ntohs(dport);
	unsigned short hpnum = ntohs(rport);

	SOCKHASH_LOCK();
	hh[0] = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)];
	hh[1] = udp_hash[hpnum & (UDP_HTABLE_SIZE - 1)];
	for (i = 0; i < 2; i++) {
		for(sk = hh[i]; sk != NULL; sk = sk->next) {
			if(sk->num == hnum || sk->num == hpnum) {
				int score = 0;
				if(sk->dead && (sk->state == TCP_CLOSE))
					continue;
				if(sk->rcv_saddr) {
					if((sk->num != hpnum || sk->rcv_saddr != paddr) &&
					   (sk->num != hnum || sk->rcv_saddr != daddr))
						continue;
					score++;
				}
				if(sk->daddr) {
					if(sk->daddr != saddr)
						continue;
					score++;
				}
				if(sk->dummy_th.dest) {
					if(sk->dummy_th.dest != sport)
						continue;
					score++;
				}
				/* If this socket is bound to a particular interface,
				 * did the packet come in on it? */
				if(sk->bound_device) {
					if (sk->bound_device != dev)
						continue;
					score++;
				}
				if(score == 4 && sk->num == hnum) {
					result = sk;
					break;
				} else if(score > badness && (sk->num == hpnum || sk->rcv_saddr)) {
					result = sk;
					badness = score;
				}
			}
		}
	}
	SOCKHASH_UNLOCK();
	return result;
}
#endif

static inline struct sock *udp_v4_mcast_next(struct sock *sk,
					     unsigned short num,
					     unsigned long raddr,
					     unsigned short rnum,
					     unsigned long laddr,
					     struct device *dev)
{
	struct sock *s = sk;
	unsigned short hnum = ntohs(num);
	for(; s; s = s->next) {
		if ((s->num != hnum)					||
		    (s->dead && (s->state == TCP_CLOSE))		||
		    (s->daddr && s->daddr!=raddr)			||
		    (s->dummy_th.dest != rnum && s->dummy_th.dest != 0) ||
		    ((s->bound_device) && (s->bound_device!=dev))       ||
		    (s->rcv_saddr  && s->rcv_saddr != laddr))
			continue;
		break;
  	}
  	return s;
}

#define min(a,b)	((a)<(b)?(a):(b))


/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
 * be closed and the error returned to the user.  If err > 0
 * it's just the icmp type << 8 | icmp code.  
 * Header points to the ip header of the error packet. We move
 * on past this. Then (as it used to claim before adjustment)
 * header points to the first 8 bytes of the udp header.  We need
 * to find the appropriate port.
 */

void udp_err(int type, int code, unsigned char *header, __u32 daddr,
	__u32 saddr, struct inet_protocol *protocol, int len)
{
	struct udphdr *uh;
	struct sock *sk;

	/*
	 *	Find the 8 bytes of post IP header ICMP included for us
	 */  
	 
	if(len<sizeof(struct udphdr))
		return;
	
	uh = (struct udphdr *)header;  
   
	sk = udp_v4_lookup(daddr, uh->dest, saddr, uh->source, NULL);
	if (sk == NULL) 
	  	return;	/* No socket for error */
  	
	if (type == ICMP_SOURCE_QUENCH) 
	{	/* Slow down! */
		if (sk->cong_window > 1) 
			sk->cong_window = sk->cong_window/2;
		return;
	}

	if (type == ICMP_PARAMETERPROB)
	{
		sk->err = EPROTO;
		sk->error_report(sk);
		return;
	}
			
	/*
	 *	Various people wanted BSD UDP semantics. Well they've come 
	 *	back out because they slow down response to stuff like dead
	 *	or unreachable name servers and they screw term users something
	 *	chronic. Oh and it violates RFC1122. So basically fix your 
	 *	client code people.
	 */
	 
	/* RFC1122: OK.  Passes ICMP errors back to application, as per */
	/* 4.1.3.3. */
	/* After the comment above, that should be no surprise. */

	if(code<=NR_ICMP_UNREACH && icmp_err_convert[code].fatal)
	{
		/*
		 *	4.x BSD compatibility item. Break RFC1122 to
		 *	get BSD socket semantics.
		 */
		if(sk->bsdism && sk->state!=TCP_ESTABLISHED)
			return;
		sk->err = icmp_err_convert[code].errno;
		sk->error_report(sk);
	}
}


static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
{
	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
}

struct udpfakehdr 
{
	struct udphdr uh;
	__u32 daddr;
	__u32 other;
	const char *from;
	__u32 wcheck;
};

/*
 *	Copy and checksum a UDP packet from user space into a buffer. We still have to do the planning to
 *	get ip_build_xmit to spot direct transfer to network card and provide an additional callback mode
 *	for direct user->board I/O transfers. That one will be fun.
 */
 
static void udp_getfrag(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) 
{
	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
	const char *src;
	char *dst;
	unsigned int len;

	if (offset) 
	{
		len = fraglen;
	 	src = ufh->from+(offset-sizeof(struct udphdr));
	 	dst = to;
	}
	else 
	{
		len = fraglen-sizeof(struct udphdr);
 		src = ufh->from;
		dst = to+sizeof(struct udphdr);
	}
	ufh->wcheck = csum_partial_copy_fromuser(src, dst, len, ufh->wcheck);
	if (offset == 0) 
	{
 		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
 				   ufh->wcheck);
		ufh->uh.check = csum_tcpudp_magic(saddr, ufh->daddr, 
					  ntohs(ufh->uh.len),
					  IPPROTO_UDP, ufh->wcheck);
		if (ufh->uh.check == 0)
			ufh->uh.check = -1;
		memcpy(to, ufh, sizeof(struct udphdr));
	}
}

/*
 *	Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
 *	that we use two routines for this for speed. Probably we ought to have a CONFIG_FAST_NET
 *	set for >10Mb/second boards to activate this sort of coding. Timing needed to verify if
 *	this is a valid decision.
 */
 
static void udp_getfrag_nosum(const void *p, __u32 saddr, char * to, unsigned int offset, unsigned int fraglen) 
{
	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
	const char *src;
	char *dst;
	unsigned int len;

	if (offset) 
	{
		len = fraglen;
	 	src = ufh->from+(offset-sizeof(struct udphdr));
	 	dst = to;
	}
	else 
	{
		len = fraglen-sizeof(struct udphdr);
 		src = ufh->from;
		dst = to+sizeof(struct udphdr);
	}
	memcpy_fromfs(dst,src,len);
	if (offset == 0) 
		memcpy(to, ufh, sizeof(struct udphdr));
}


/*
 *	Send UDP frames.
 */

static int udp_send(struct sock *sk, struct sockaddr_in *sin,
		      const unsigned char *from, int len, int rt,
		    __u32 saddr, int noblock) 
{
	int ulen = len + sizeof(struct udphdr);
	int a;
	struct udpfakehdr ufh;
	
	if(ulen>65535-sizeof(struct iphdr))
		return -EMSGSIZE;

	ufh.uh.source = sk->dummy_th.source;
	ufh.uh.dest = sin->sin_port;
	ufh.uh.len = htons(ulen);
	ufh.uh.check = 0;
	ufh.daddr = sin->sin_addr.s_addr;
	ufh.other = (htons(ulen) << 16) + IPPROTO_UDP*256;
	ufh.from = from;
	ufh.wcheck = 0;

#ifdef CONFIG_IP_TRANSPARENT_PROXY
	if (rt&MSG_PROXY)
	{
		/*
		 * We map the first 8 bytes of a second sockaddr_in
		 * into the last 8 (unused) bytes of a sockaddr_in.
		 * This _is_ ugly, but it's the only way to do it
		 * easily,  without adding system calls.
		 */
		struct sockaddr_in *sinfrom =
			(struct sockaddr_in *) sin->sin_zero;

		if (!suser())
			return(-EPERM);
		if (sinfrom->sin_family && sinfrom->sin_family != AF_INET)
			return(-EINVAL);
		if (sinfrom->sin_port == 0)
			return(-EINVAL);
		saddr = sinfrom->sin_addr.s_addr;
		ufh.uh.source = sinfrom->sin_port;
	}
#endif

	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
	/* 4.1.3.4. It's configurable by the application via setsockopt() */
	/* (MAY) and it defaults to on (MUST).  Almost makes up for the */
	/* violation above. -- MS */

	if(sk->no_check)
		a = ip_build_xmit(sk, udp_getfrag_nosum, &ufh, ulen, 
			sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock);
	else
		a = ip_build_xmit(sk, udp_getfrag, &ufh, ulen, 
			sin->sin_addr.s_addr, saddr, sk->opt, rt, IPPROTO_UDP, noblock);
	if(a<0)
		return a;
	udp_statistics.UdpOutDatagrams++;
	return len;
}
Exemple #5
0
void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info)
{
	struct iphdr *iph;
	struct icmphdr *icmph;
	int room;
	struct icmp_bxm icmp_param;
	struct rtable *rt = (struct rtable*)skb_in->dst;
	struct ipcm_cookie ipc;
	u32 saddr;
	u8  tos;
	
	/*
	 *	Find the original header
	 */
	 
	iph = skb_in->nh.iph;
	
	/*
	 *	No replies to physical multicast/broadcast
	 */
	 
	if (skb_in->pkt_type!=PACKET_HOST)
		return;
		
	/*
	 *	Now check at the protocol level
	 */
	if (!rt) {
		if (sysctl_ip_always_defrag == 0 &&
		    net_ratelimit())
			printk(KERN_DEBUG "icmp_send: destinationless packet\n");
		return;
	}
	if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
		return;
	 
		
	/*
	 *	Only reply to fragment 0. We byte re-order the constant
	 *	mask for efficiency.
	 */
	 
	if (iph->frag_off&htons(IP_OFFSET))
		return;
		
	/* 
	 *	If we send an ICMP error to an ICMP error a mess would result..
	 */
	 
	if (icmp_pointers[type].error) {
		/*
		 *	We are an error, check if we are replying to an ICMP error
		 */
		 
		if (iph->protocol==IPPROTO_ICMP) {
			icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
			/*
			 *	Assume any unknown ICMP type is an error. This isn't
			 *	specified by the RFC, but think about it..
			 */
			if (icmph->type>NR_ICMP_TYPES || icmp_pointers[icmph->type].error)
				return;
		}
	}


	/*
	 *	Construct source address and options.
	 */

#ifdef CONFIG_IP_ROUTE_NAT	
	/*
	 *	Restore original addresses if packet has been translated.
	 */
	if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) {
		iph->daddr = rt->key.dst;
		iph->saddr = rt->key.src;
	}
#endif
#ifdef CONFIG_IP_MASQUERADE
	if (type==ICMP_DEST_UNREACH && IPCB(skb_in)->flags&IPSKB_MASQUERADED) {
			ip_fw_unmasq_icmp(skb_in);
	}
#endif

	saddr = iph->daddr;
	if (!(rt->rt_flags & RTCF_LOCAL))
		saddr = 0;

	tos = icmp_pointers[type].error ?
		((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) :
			iph->tos;

	/* XXX: use a more aggressive expire for routes created by 
	 * this call (not longer than the rate limit timeout). 
	 * It could be also worthwhile to not put them into ipv4
	 * fast routing cache at first. Otherwise an attacker can
	 * grow the routing table.
	 */
	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
		return;
	
	if (ip_options_echo(&icmp_param.replyopts, skb_in)) 
		goto ende;


	/*
	 *	Prepare data for ICMP header.
	 */

	icmp_param.icmph.type=type;
	icmp_param.icmph.code=code;
	icmp_param.icmph.un.gateway = info;
	icmp_param.icmph.checksum=0;
	icmp_param.csum=0;
	icmp_param.data_ptr=iph;
	icmp_out_count(icmp_param.icmph.type);
	icmp_socket->sk->ip_tos = tos;
	ipc.addr = iph->saddr;
	ipc.opt = &icmp_param.replyopts;
	if (icmp_param.replyopts.srr) {
		ip_rt_put(rt);
		if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0))
			return;
	}

	if (!icmpv4_xrlim_allow(rt, type, code))
		goto ende;

	/* RFC says return as much as we can without exceeding 576 bytes. */

	room = rt->u.dst.pmtu;
	if (room > 576)
		room = 576;
	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
	room -= sizeof(struct icmphdr);

	icmp_param.data_len=(iph->ihl<<2)+skb_in->len;
	if (icmp_param.data_len > room)
		icmp_param.data_len = room;
	
	ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 
		icmp_param.data_len+sizeof(struct icmphdr),
		&ipc, rt, MSG_DONTWAIT);

ende:
	ip_rt_put(rt);
}
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
{
	struct ipcm_cookie ipc;
	struct rawfakehdr rfh;
	struct rtable *rt = NULL;
	int free = 0;
	u32 daddr;
	u8  tos;
	int err;

	/* This check is ONLY to check for arithmetic overflow
	   on integer(!) len. Not more! Real check will be made
	   in ip_build_xmit --ANK

	   BTW socket.c -> af_*.c -> ... make multiple
	   invalid conversions size_t -> int. We MUST repair it f.e.
	   by replacing all of them with size_t and revise all
	   the places sort of len += sizeof(struct iphdr)
	   If len was ULONG_MAX-10 it would be cathastrophe  --ANK
	 */

	err = -EMSGSIZE;
	if (len < 0 || len > 0xFFFF)
		goto out;

	/*
	 *	Check the flags.
	 */

	err = -EOPNOTSUPP;
	if (msg->msg_flags & MSG_OOB)	/* Mirror BSD error message */
		goto out;               /* compatibility */
			 
	/*
	 *	Get and verify the address. 
	 */

	if (msg->msg_namelen) {
		struct sockaddr_in *usin = (struct sockaddr_in*)msg->msg_name;
		err = -EINVAL;
		if (msg->msg_namelen < sizeof(*usin))
			goto out;
		if (usin->sin_family != AF_INET) {
			static int complained;
			if (!complained++)
				printk(KERN_INFO "%s forgot to set AF_INET in "
						 "raw sendmsg. Fix it!\n",
						 current->comm);
			err = -EINVAL;
			if (usin->sin_family)
				goto out;
		}
		daddr = usin->sin_addr.s_addr;
		/* ANK: I did not forget to get protocol from port field.
		 * I just do not know, who uses this weirdness.
		 * IP_HDRINCL is much more convenient.
		 */
	} else {
		err = -EDESTADDRREQ;
		if (sk->state != TCP_ESTABLISHED) 
			goto out;
		daddr = sk->daddr;
	}

	ipc.addr = sk->saddr;
	ipc.opt = NULL;
	ipc.oif = sk->bound_dev_if;

	if (msg->msg_controllen) {
		err = ip_cmsg_send(msg, &ipc);
		if (err)
			goto out;
		if (ipc.opt)
			free = 1;
	}

	rfh.saddr = ipc.addr;
	ipc.addr = daddr;

	if (!ipc.opt)
		ipc.opt = sk->protinfo.af_inet.opt;

	if (ipc.opt) {
		err = -EINVAL;
		/* Linux does not mangle headers on raw sockets,
		 * so that IP options + IP_HDRINCL is non-sense.
		 */
		if (sk->protinfo.af_inet.hdrincl)
			goto done;
		if (ipc.opt->srr) {
			if (!daddr)
				goto done;
			daddr = ipc.opt->faddr;
		}
	}
	tos = RT_TOS(sk->protinfo.af_inet.tos) | sk->localroute;
	if (msg->msg_flags & MSG_DONTROUTE)
		tos |= RTO_ONLINK;

	if (MULTICAST(daddr)) {
		if (!ipc.oif)
			ipc.oif = sk->protinfo.af_inet.mc_index;
		if (!rfh.saddr)
			rfh.saddr = sk->protinfo.af_inet.mc_addr;
	}

	err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);

	if (err)
		goto done;

	err = -EACCES;
	if (rt->rt_flags & RTCF_BROADCAST && !sk->broadcast)
		goto done;

	if (msg->msg_flags & MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

	rfh.iov		= msg->msg_iov;
	rfh.saddr	= rt->rt_src;
	rfh.dst		= &rt->u.dst;
	if (!ipc.addr)
		ipc.addr = rt->rt_dst;
	err = ip_build_xmit(sk, sk->protinfo.af_inet.hdrincl ? raw_getrawfrag :
		       	    raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags);

done:
	if (free)
		kfree(ipc.opt);
	ip_rt_put(rt);

out:	return err < 0 ? err : len;

do_confirm:
	dst_confirm(&rt->u.dst);
	if (!(msg->msg_flags & MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto done;
}