Beispiel #1
0
/** send an udp packet over an IP_HDRINCL raw socket.
 * If needed, send several fragments.
 * @param rsock - raw socket
 * @param buf - data
 * @param len - data len
 * @param from - source address:port (_must_ be non-null, but the ip address
 *                can be 0, in which case it will be filled by the kernel).
 * @param to - destination address:port
 * @param mtu - maximum datagram size (including the ip header, excluding
 *              link layer headers). Minimum allowed size is 28
 *               (sizeof(ip_header + udp_header)). If mtu is lower, it will
 *               be ignored (the packet will be sent un-fragmented).
 *              0 can be used to disable fragmentation.
 * @return  <0 on error (-2: datagram too big, -1: check errno),
 *          number of bytes sent on success
 *          (including the ip & udp headers =>
 *               on success len + udpheader + ipheader size).
 */
int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len,
						union sockaddr_union* from,
						union sockaddr_union* to, unsigned short mtu)
{
	struct msghdr snd_msg;
	struct iovec iov[2];
	struct ip_udp_hdr {
		struct ip ip;
		struct udphdr udp;
	} hdr;
	unsigned int totlen;
#ifndef RAW_IPHDR_INC_AUTO_FRAG
	unsigned int ip_frag_size; /* fragment size */
	unsigned int last_frag_extra; /* extra bytes possible in the last frag */
	unsigned int ip_payload;
	unsigned int last_frag_offs;
	void* last_frag_start;
	int frg_no;
#endif /* RAW_IPHDR_INC_AUTO_FRAG */
	int ret;

	totlen = len + sizeof(hdr);
	if (unlikely(totlen) > 65535)
		return -2;
	memset(&snd_msg, 0, sizeof(snd_msg));
	snd_msg.msg_name=&to->sin;
	snd_msg.msg_namelen=sockaddru_len(*to);
	snd_msg.msg_iov=&iov[0];
	/* prepare the udp & ip headers */
	mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1);
	mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr,
				len + sizeof(hdr.udp), IPPROTO_UDP);
	iov[0].iov_base=(char*)&hdr;
	iov[0].iov_len=sizeof(hdr);
	snd_msg.msg_iovlen=2;
	snd_msg.msg_control=0;
	snd_msg.msg_controllen=0;
	snd_msg.msg_flags=0;
	/* this part changes for different fragments */
	/* packets are fragmented if mtu has a valid value (at least an
	   IP header + UDP header fit in it) and if the total length is greater
	   then the mtu */
#ifndef RAW_IPHDR_INC_AUTO_FRAG
	if (likely(totlen <= mtu || mtu <= sizeof(hdr))) {
#endif /* RAW_IPHDR_INC_AUTO_FRAG */
		iov[1].iov_base=buf;
		iov[1].iov_len=len;
		ret=sendmsg(rsock, &snd_msg, 0);
#ifndef RAW_IPHDR_INC_AUTO_FRAG
	} else {
		ip_payload = len + sizeof(hdr.udp);
		/* a fragment offset must be a multiple of 8 => its size must
		   also be a multiple of 8, except for the last fragment */
		ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7);
		last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */
		frg_no = ip_payload / ip_frag_size +
				 ((ip_payload % ip_frag_size) > last_frag_extra);
		/*ip_last_frag_size = ip_payload % frag_size +
							((ip_payload % frag_size) <= last_frag_extra) *
							ip_frag_size; */
		last_frag_offs = (frg_no - 1) * ip_frag_size;
		/* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu
		   => last_frag_offs >= sizeof(hdr.udp) */
		last_frag_start = buf + last_frag_offs - sizeof(hdr.udp);
		hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0
											  (if 0 the kernel will fill it) */
		/* send the first fragment */
		iov[1].iov_base=buf;
		/* ip_frag_size >= sizeof(hdr.udp) because we are here only
		   if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */
		iov[1].iov_len=ip_frag_size - sizeof(hdr.udp);
		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip));
		hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */
		ret=sendmsg(rsock, &snd_msg, 0);
		if (unlikely(ret < 0))
			goto end;
		/* all the other fragments, include only the ip header */
		iov[0].iov_len = sizeof(hdr.ip);
		iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
		/* fragments between the first and the last */
		while(unlikely(iov[1].iov_base < last_frag_start)) {
			iov[1].iov_len = ip_frag_size;
			hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
			/* set MF  */
			hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short)
									(((char*)iov[1].iov_base - (char*)buf +
										sizeof(hdr.udp)) / 8) | 0x2000 );
			ret=sendmsg(rsock, &snd_msg, 0);
			if (unlikely(ret < 0))
				goto end;
			iov[1].iov_base =  (char*)iov[1].iov_base + iov[1].iov_len;
		}
		/* last fragment */
		iov[1].iov_len = buf + len - (char*)iov[1].iov_base;
		hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip));
		/* don't set MF (last fragment) */
		hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short)
									(((char*)iov[1].iov_base - (char*)buf +
										sizeof(hdr.udp)) / 8) );
		ret=sendmsg(rsock, &snd_msg, 0);
		if (unlikely(ret < 0))
			goto end;
	}
end:
#endif /* RAW_IPHDR_INC_AUTO_FRAG */
	return ret;
}
Beispiel #2
0
/* loadbalance_by_weight() uses an algorithm to randomly pick a server out of
 * a list based on its relative weight.
 *
 * It is loosely inspired by this:
 * http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/
 *
 * The insert_server_group() function provides the ability to get the combined
 * weight of all the servers off the head of the list, making it possible to
 * compute in O(n) in the worst case and O(1) in the best.
 *
 * A random number out of the total weight is chosen. Each node is inspected and
 * its weight added to a recurring sum. Once the sum is larger than the random
 * number the last server that was seen is chosen.
 *
 * A weight of 0 will almost never be chosen, unless if maybe all the other
 * servers are offline.
 *
 * The exception is when all the servers in a group have a weight of 0. In
 * this case, the load should be distributed evenly across each of them. This
 * requires finding the size of the list beforehand.
 * */
void loadbalance_by_weight(jsonrpc_server_t** s,
		jsonrpc_server_group_t* grp, server_list_t* tried)
{
	*s = NULL;

	if(grp == NULL) {
		ERR("Trying to pick from an empty group\n");
		return;
	}

	if(grp->type != WEIGHT_GROUP) {
		ERR("Trying to pick from a non weight group\n");
		return;
	}

	jsonrpc_server_group_t* head = grp;
	jsonrpc_server_group_t* cur = grp;

	unsigned int pick = 0;
	if(head->weight == 0) {
		unsigned int size = 0;
		size = server_group_size(cur);
		if(size == 0) return;

		pick = fastrand_max(size-1);

		int i;
		for(i=0;
			(i <= pick || *s == NULL)
				&& cur != NULL;
			i++, cur=cur->next)
		{
			if(cur->server->status == JSONRPC_SERVER_CONNECTED) {
				if(!server_tried(cur->server, tried)
					&& (cur->server->hwm <= 0
						|| cur->server->req_count < cur->server->hwm))
				{
					*s = cur->server;
				}
			}
		}
	} else {
		pick = fastrand_max(head->weight - 1);

		unsigned int sum = 0;
		while(1) {
			if(cur == NULL) break;
			if(cur->server->status == JSONRPC_SERVER_CONNECTED) {
				if(!server_tried(cur->server, tried)
					&& (cur->server->hwm <= 0
						|| cur->server->req_count < cur->server->hwm))
				{
					*s = cur->server;
				}
			}
			sum += cur->server->weight;
			if(sum > pick && *s != NULL) break;
			cur = cur->next;
		}
	}
}