/** send an udp packet over an IP_HDRINCL raw socket. * If needed, send several fragments. * @param rsock - raw socket * @param buf - data * @param len - data len * @param from - source address:port (_must_ be non-null, but the ip address * can be 0, in which case it will be filled by the kernel). * @param to - destination address:port * @param mtu - maximum datagram size (including the ip header, excluding * link layer headers). Minimum allowed size is 28 * (sizeof(ip_header + udp_header)). If mtu is lower, it will * be ignored (the packet will be sent un-fragmented). * 0 can be used to disable fragmentation. * @return <0 on error (-2: datagram too big, -1: check errno), * number of bytes sent on success * (including the ip & udp headers => * on success len + udpheader + ipheader size). */ int raw_iphdr_udp4_send(int rsock, char* buf, unsigned int len, union sockaddr_union* from, union sockaddr_union* to, unsigned short mtu) { struct msghdr snd_msg; struct iovec iov[2]; struct ip_udp_hdr { struct ip ip; struct udphdr udp; } hdr; unsigned int totlen; #ifndef RAW_IPHDR_INC_AUTO_FRAG unsigned int ip_frag_size; /* fragment size */ unsigned int last_frag_extra; /* extra bytes possible in the last frag */ unsigned int ip_payload; unsigned int last_frag_offs; void* last_frag_start; int frg_no; #endif /* RAW_IPHDR_INC_AUTO_FRAG */ int ret; totlen = len + sizeof(hdr); if (unlikely(totlen) > 65535) return -2; memset(&snd_msg, 0, sizeof(snd_msg)); snd_msg.msg_name=&to->sin; snd_msg.msg_namelen=sockaddru_len(*to); snd_msg.msg_iov=&iov[0]; /* prepare the udp & ip headers */ mk_udp_hdr(&hdr.udp, &from->sin, &to->sin, (unsigned char*)buf, len, 1); mk_ip_hdr(&hdr.ip, &from->sin.sin_addr, &to->sin.sin_addr, len + sizeof(hdr.udp), IPPROTO_UDP); iov[0].iov_base=(char*)&hdr; iov[0].iov_len=sizeof(hdr); snd_msg.msg_iovlen=2; snd_msg.msg_control=0; snd_msg.msg_controllen=0; snd_msg.msg_flags=0; /* this part changes for different fragments */ /* packets are fragmented if mtu has a valid value (at least an IP header + UDP header fit in it) and if the total length is greater then the mtu */ #ifndef RAW_IPHDR_INC_AUTO_FRAG if (likely(totlen <= mtu || mtu <= sizeof(hdr))) { #endif /* RAW_IPHDR_INC_AUTO_FRAG */ iov[1].iov_base=buf; iov[1].iov_len=len; ret=sendmsg(rsock, &snd_msg, 0); #ifndef RAW_IPHDR_INC_AUTO_FRAG } else { ip_payload = len + sizeof(hdr.udp); /* a fragment offset must be a multiple of 8 => its size must also be a multiple of 8, except for the last fragment */ ip_frag_size = (mtu -sizeof(hdr.ip)) & (~7); last_frag_extra = (mtu - sizeof(hdr.ip)) & 7; /* rest */ frg_no = ip_payload / ip_frag_size + ((ip_payload % ip_frag_size) > last_frag_extra); /*ip_last_frag_size = ip_payload % frag_size + ((ip_payload % frag_size) <= last_frag_extra) * ip_frag_size; */ last_frag_offs = (frg_no - 1) * ip_frag_size; /* if we are here mtu => sizeof(ip_h+udp_h) && payload > mtu => last_frag_offs >= sizeof(hdr.udp) */ last_frag_start = buf + last_frag_offs - sizeof(hdr.udp); hdr.ip.ip_id = fastrand_max(65534) + 1; /* random id, should be != 0 (if 0 the kernel will fill it) */ /* send the first fragment */ iov[1].iov_base=buf; /* ip_frag_size >= sizeof(hdr.udp) because we are here only if mtu >= sizeof(hdr.ip) + sizeof(hdr.udp) */ iov[1].iov_len=ip_frag_size - sizeof(hdr.udp); hdr.ip.ip_len = RAW_IPHDR_IP_LEN(ip_frag_size + sizeof(hdr.ip)); hdr.ip.ip_off = RAW_IPHDR_IP_OFF(0x2000); /* set MF */ ret=sendmsg(rsock, &snd_msg, 0); if (unlikely(ret < 0)) goto end; /* all the other fragments, include only the ip header */ iov[0].iov_len = sizeof(hdr.ip); iov[1].iov_base = (char*)iov[1].iov_base + iov[1].iov_len; /* fragments between the first and the last */ while(unlikely(iov[1].iov_base < last_frag_start)) { iov[1].iov_len = ip_frag_size; hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip)); /* set MF */ hdr.ip.ip_off = RAW_IPHDR_IP_OFF( (unsigned short) (((char*)iov[1].iov_base - (char*)buf + sizeof(hdr.udp)) / 8) | 0x2000 ); ret=sendmsg(rsock, &snd_msg, 0); if (unlikely(ret < 0)) goto end; iov[1].iov_base = (char*)iov[1].iov_base + iov[1].iov_len; } /* last fragment */ iov[1].iov_len = buf + len - (char*)iov[1].iov_base; hdr.ip.ip_len = RAW_IPHDR_IP_LEN(iov[1].iov_len + sizeof(hdr.ip)); /* don't set MF (last fragment) */ hdr.ip.ip_off = RAW_IPHDR_IP_OFF((unsigned short) (((char*)iov[1].iov_base - (char*)buf + sizeof(hdr.udp)) / 8) ); ret=sendmsg(rsock, &snd_msg, 0); if (unlikely(ret < 0)) goto end; } end: #endif /* RAW_IPHDR_INC_AUTO_FRAG */ return ret; }
/* loadbalance_by_weight() uses an algorithm to randomly pick a server out of * a list based on its relative weight. * * It is loosely inspired by this: * http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/ * * The insert_server_group() function provides the ability to get the combined * weight of all the servers off the head of the list, making it possible to * compute in O(n) in the worst case and O(1) in the best. * * A random number out of the total weight is chosen. Each node is inspected and * its weight added to a recurring sum. Once the sum is larger than the random * number the last server that was seen is chosen. * * A weight of 0 will almost never be chosen, unless if maybe all the other * servers are offline. * * The exception is when all the servers in a group have a weight of 0. In * this case, the load should be distributed evenly across each of them. This * requires finding the size of the list beforehand. * */ void loadbalance_by_weight(jsonrpc_server_t** s, jsonrpc_server_group_t* grp, server_list_t* tried) { *s = NULL; if(grp == NULL) { ERR("Trying to pick from an empty group\n"); return; } if(grp->type != WEIGHT_GROUP) { ERR("Trying to pick from a non weight group\n"); return; } jsonrpc_server_group_t* head = grp; jsonrpc_server_group_t* cur = grp; unsigned int pick = 0; if(head->weight == 0) { unsigned int size = 0; size = server_group_size(cur); if(size == 0) return; pick = fastrand_max(size-1); int i; for(i=0; (i <= pick || *s == NULL) && cur != NULL; i++, cur=cur->next) { if(cur->server->status == JSONRPC_SERVER_CONNECTED) { if(!server_tried(cur->server, tried) && (cur->server->hwm <= 0 || cur->server->req_count < cur->server->hwm)) { *s = cur->server; } } } } else { pick = fastrand_max(head->weight - 1); unsigned int sum = 0; while(1) { if(cur == NULL) break; if(cur->server->status == JSONRPC_SERVER_CONNECTED) { if(!server_tried(cur->server, tried) && (cur->server->hwm <= 0 || cur->server->req_count < cur->server->hwm)) { *s = cur->server; } } sum += cur->server->weight; if(sum > pick && *s != NULL) break; cur = cur->next; } } }