Exemplo n.º 1
0
void 
sldns_buffer_copy(sldns_buffer* result, sldns_buffer* from)
{
	size_t tocopy = sldns_buffer_limit(from);

	if(tocopy > sldns_buffer_capacity(result))
		tocopy = sldns_buffer_capacity(result);
	sldns_buffer_clear(result);
	sldns_buffer_write(result, sldns_buffer_begin(from), tocopy);
	sldns_buffer_flip(result);
}
Exemplo n.º 2
0
int 
reply_info_answer_encode(struct query_info* qinf, struct reply_info* rep, 
	uint16_t id, uint16_t qflags, sldns_buffer* pkt, time_t timenow,
	int cached, struct regional* region, uint16_t udpsize, 
	struct edns_data* edns, int dnssec, int secure)
{
	uint16_t flags;
	unsigned int attach_edns = 0;

	if(!cached || rep->authoritative) {
		/* original flags, copy RD and CD bits from query. */
		flags = rep->flags | (qflags & (BIT_RD|BIT_CD)); 
	} else {
		/* remove AA bit, copy RD and CD bits from query. */
		flags = (rep->flags & ~BIT_AA) | (qflags & (BIT_RD|BIT_CD)); 
	}
	if(secure && (dnssec || (qflags&BIT_AD)))
		flags |= BIT_AD;
	/* restore AA bit if we have a local alias and the response can be
	 * authoritative.  Also clear AD bit if set as the local data is the
	 * primary answer. */
	if(qinf->local_alias &&
		(FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NOERROR ||
		FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN)) {
		flags |= BIT_AA;
		flags &= ~BIT_AD;
	}
	log_assert(flags & BIT_QR); /* QR bit must be on in our replies */
	if(udpsize < LDNS_HEADER_SIZE)
		return 0;
	if(sldns_buffer_capacity(pkt) < udpsize)
		udpsize = sldns_buffer_capacity(pkt);
	if(udpsize < LDNS_HEADER_SIZE + calc_edns_field_size(edns)) {
		/* packet too small to contain edns, omit it. */
		attach_edns = 0;
	} else {
		/* reserve space for edns record */
		attach_edns = (unsigned int)calc_edns_field_size(edns);
		udpsize -= attach_edns;
	}

	if(!reply_info_encode(qinf, rep, id, flags, pkt, timenow, region,
		udpsize, dnssec)) {
		log_err("reply encode: out of memory");
		return 0;
	}
	if(attach_edns && sldns_buffer_capacity(pkt) >=
		sldns_buffer_limit(pkt)+attach_edns)
		attach_edns_record(pkt, edns);
	return 1;
}
Exemplo n.º 3
0
static int
testframe_lookup(struct module_env* env, struct cachedb_env* cachedb_env,
	char* key, struct sldns_buffer* result_buffer)
{
	struct testframe_moddata* d = (struct testframe_moddata*)
		cachedb_env->backend_data;
	(void)env;
	verbose(VERB_ALGO, "testframe_lookup of %s", key);
	lock_basic_lock(&d->lock);
	if(d->stored_key && strcmp(d->stored_key, key) == 0) {
		if(d->stored_datalen > sldns_buffer_capacity(result_buffer)) {
			lock_basic_unlock(&d->lock);
			return 0; /* too large */
		}
		verbose(VERB_ALGO, "testframe_lookup found %d bytes",
			(int)d->stored_datalen);
		sldns_buffer_clear(result_buffer);
		sldns_buffer_write(result_buffer, d->stored_data,
			d->stored_datalen);
		sldns_buffer_flip(result_buffer);
		lock_basic_unlock(&d->lock);
		return 1;
	}
	lock_basic_unlock(&d->lock);
	return 0;
}
Exemplo n.º 4
0
/**
 * Read a file with trust anchors
 * @param anchors: anchor storage.
 * @param buffer: parsing buffer.
 * @param fname: string.
 * @param onlyone: only one trust anchor allowed in file.
 * @return NULL on error. Else last trust-anchor point.
 */
static struct trust_anchor*
anchor_read_file(struct val_anchors* anchors, sldns_buffer* buffer,
	const char* fname, int onlyone)
{
	struct trust_anchor* ta = NULL, *tanew;
	struct sldns_file_parse_state pst;
	int status;
	size_t len, dname_len;
	uint8_t* rr = sldns_buffer_begin(buffer);
	int ok = 1;
	FILE* in = fopen(fname, "r");
	if(!in) {
		log_err("error opening file %s: %s", fname, strerror(errno));
		return 0;
	}
	memset(&pst, 0, sizeof(pst));
	pst.default_ttl = 3600;
	pst.lineno = 1;
	while(!feof(in)) {
		len = sldns_buffer_capacity(buffer);
		dname_len = 0;
		status = sldns_fp2wire_rr_buf(in, rr, &len, &dname_len, &pst);
		if(len == 0) /* empty, $TTL, $ORIGIN */
			continue;
		if(status != 0) {
			log_err("parse error in %s:%d:%d : %s", fname,
				pst.lineno, LDNS_WIREPARSE_OFFSET(status),
				sldns_get_errorstr_parse(status));
			ok = 0;
			break;
		}
		if(sldns_wirerr_get_type(rr, len, dname_len) !=
			LDNS_RR_TYPE_DS && sldns_wirerr_get_type(rr, len,
			dname_len) != LDNS_RR_TYPE_DNSKEY) {
			continue;
		}
		if(!(tanew=anchor_store_new_rr(anchors, rr, len, dname_len))) {
			log_err("mem error at %s line %d", fname, pst.lineno);
			ok = 0;
			break;
		}
		if(onlyone && ta && ta != tanew) {
			log_err("error at %s line %d: no multiple anchor "
				"domains allowed (you can have multiple "
				"keys, but they must have the same name).", 
				fname, pst.lineno);
			ok = 0;
			break;
		}
		ta = tanew;
	}
	fclose(in);
	if(!ok) return NULL;
	/* empty file is OK when multiple anchors are allowed */
	if(!onlyone && !ta) return (struct trust_anchor*)1;
	return ta;
}
Exemplo n.º 5
0
/** setup qinfo and edns */
static int
setup_qinfo_edns(struct libworker* w, struct ctx_query* q, 
	struct query_info* qinfo, struct edns_data* edns)
{
	qinfo->qtype = (uint16_t)q->res->qtype;
	qinfo->qclass = (uint16_t)q->res->qclass;
	qinfo->qname = sldns_str2wire_dname(q->res->qname, &qinfo->qname_len);
	if(!qinfo->qname) {
		return 0;
	}
	edns->edns_present = 1;
	edns->ext_rcode = 0;
	edns->edns_version = 0;
	edns->bits = EDNS_DO;
	if(sldns_buffer_capacity(w->back->udp_buff) < 65535)
		edns->udp_size = (uint16_t)sldns_buffer_capacity(
			w->back->udp_buff);
	else	edns->udp_size = 65535;
	return 1;
}
Exemplo n.º 6
0
size_t listen_get_mem(struct listen_dnsport* listen)
{
	size_t s = sizeof(*listen) + sizeof(*listen->base) + 
		sizeof(*listen->udp_buff) + 
		sldns_buffer_capacity(listen->udp_buff);
	struct listen_list* p;
	for(p = listen->cps; p; p = p->next) {
		s += sizeof(*p);
		s += comm_point_get_mem(p->com);
	}
	return s;
}
Exemplo n.º 7
0
struct waiting_tcp* 
pending_tcp_query(struct outside_network* outnet, sldns_buffer* packet,
	struct sockaddr_storage* addr, socklen_t addrlen, int timeout,
	comm_point_callback_t* callback, void* callback_arg,
	int ATTR_UNUSED(ssl_upstream))
{
	struct replay_runtime* runtime = (struct replay_runtime*)outnet->base;
	struct fake_pending* pend = (struct fake_pending*)calloc(1,
		sizeof(struct fake_pending));
	log_assert(pend);
	pend->buffer = sldns_buffer_new(sldns_buffer_capacity(packet));
	log_assert(pend->buffer);
	sldns_buffer_write(pend->buffer, sldns_buffer_begin(packet),
		sldns_buffer_limit(packet));
	sldns_buffer_flip(pend->buffer);
	memcpy(&pend->addr, addr, addrlen);
	pend->addrlen = addrlen;
	pend->callback = callback;
	pend->cb_arg = callback_arg;
	pend->timeout = timeout;
	pend->transport = transport_tcp;
	pend->pkt = NULL;
	pend->zone = NULL;
	pend->runtime = runtime;
	pend->serviced = 0;
	pend->pkt_len = sldns_buffer_limit(packet);
	pend->pkt = memdup(sldns_buffer_begin(packet), pend->pkt_len);
	if(!pend->pkt) fatal_exit("out of memory");
	log_pkt("pending tcp pkt: ", pend->pkt, pend->pkt_len);

	/* see if it matches the current moment */
	if(runtime->now && runtime->now->evt_type == repevt_back_query &&
		(runtime->now->addrlen == 0 || sockaddr_cmp(
			&runtime->now->addr, runtime->now->addrlen,
			&pend->addr, pend->addrlen) == 0) &&
		find_match(runtime->now->match, pend->pkt, pend->pkt_len,
			pend->transport)) {
		log_info("testbound: matched pending to event. "
			"advance time between events.");
		log_info("testbound: do STEP %d %s", runtime->now->time_step,
			repevt_string(runtime->now->evt_type));
		advance_moment(runtime);
		/* still create the pending, because we need it to callback */
	} 
	log_info("testbound: created fake pending");
	/* add to list */
	pend->next = runtime->pending_list;
	runtime->pending_list = pend;
	return (struct waiting_tcp*)pend;
}
Exemplo n.º 8
0
/** put dname into buffer */
static sldns_buffer*
dname_to_buf(sldns_buffer* b, const char* str)
{
	int e;
	size_t len = sldns_buffer_capacity(b);
	sldns_buffer_clear(b);
	e = sldns_str2wire_dname_buf(str, sldns_buffer_begin(b), &len);
	if(e != 0)
		fatal_exit("%s ldns: %s", __func__, 
			sldns_get_errorstr_parse(e));
	sldns_buffer_set_position(b, len);
	sldns_buffer_flip(b);
	return b;
}
Exemplo n.º 9
0
/** convert data from return_msg into the data buffer */
static int
prep_data(struct module_qstate* qstate, struct sldns_buffer* buf)
{
	uint64_t timestamp, expiry;
	size_t oldlim;
	struct edns_data edns;
	memset(&edns, 0, sizeof(edns));
	edns.edns_present = 1;
	edns.bits = EDNS_DO;
	edns.ext_rcode = 0;
	edns.edns_version = EDNS_ADVERTISED_VERSION;
	edns.udp_size = EDNS_ADVERTISED_SIZE;

	if(!qstate->return_msg || !qstate->return_msg->rep)
		return 0;
	/* We don't store the reply if its TTL is 0 unless serve-expired is
	 * enabled.  Such a reply won't be reusable and simply be a waste for
	 * the backend.  It's also compatible with the default behavior of
	 * dns_cache_store_msg(). */
	if(qstate->return_msg->rep->ttl == 0 &&
		!qstate->env->cfg->serve_expired)
		return 0;
	if(verbosity >= VERB_ALGO)
		log_dns_msg("cachedb encoding", &qstate->return_msg->qinfo,
	                qstate->return_msg->rep);
	if(!reply_info_answer_encode(&qstate->return_msg->qinfo,
		qstate->return_msg->rep, 0, qstate->query_flags,
		buf, 0, 1, qstate->env->scratch, 65535, &edns, 1, 0))
		return 0;

	/* TTLs in the return_msg are relative to time(0) so we have to
	 * store that, we also store the smallest ttl in the packet+time(0)
	 * as the packet expiry time */
	/* qstate->return_msg->rep->ttl contains that relative shortest ttl */
	timestamp = (uint64_t)*qstate->env->now;
	expiry = timestamp + (uint64_t)qstate->return_msg->rep->ttl;
	timestamp = htobe64(timestamp);
	expiry = htobe64(expiry);
	oldlim = sldns_buffer_limit(buf);
	if(oldlim + sizeof(timestamp)+sizeof(expiry) >=
		sldns_buffer_capacity(buf))
		return 0; /* doesn't fit. */
	sldns_buffer_set_limit(buf, oldlim + sizeof(timestamp)+sizeof(expiry));
	sldns_buffer_write_at(buf, oldlim, &timestamp, sizeof(timestamp));
	sldns_buffer_write_at(buf, oldlim+sizeof(timestamp), &expiry,
		sizeof(expiry));

	return 1;
}
Exemplo n.º 10
0
struct trust_anchor*
anchor_store_str(struct val_anchors* anchors, sldns_buffer* buffer,
	const char* str)
{
	struct trust_anchor* ta;
	uint8_t* rr = sldns_buffer_begin(buffer);
	size_t len = sldns_buffer_capacity(buffer), dname_len = 0;
	int status = sldns_str2wire_rr_buf(str, rr, &len, &dname_len,
		0, NULL, 0, NULL, 0);
	if(status != 0) {
		log_err("error parsing trust anchor %s: at %d: %s", 
			str, LDNS_WIREPARSE_OFFSET(status),
			sldns_get_errorstr_parse(status));
		return NULL;
	}
	if(!(ta=anchor_store_new_rr(anchors, rr, len, dname_len))) {
		log_err("out of memory");
		return NULL;
	}
	return ta;
}
Exemplo n.º 11
0
/** got reply for io */
static void
perfreply(struct perfinfo* info, size_t n, struct timeval* now)
{
	ssize_t r;
	r = recv(info->io[n].fd, (void*)sldns_buffer_begin(info->buf),
		sldns_buffer_capacity(info->buf), 0);
	if(r == -1) {
#ifndef USE_WINSOCK
		log_err("recv: %s", strerror(errno));
#else
		log_err("recv: %s", wsa_strerror(WSAGetLastError()));
#endif
	} else {
		info->by_rcode[LDNS_RCODE_WIRE(sldns_buffer_begin(
			info->buf))]++;
		info->numrecv++;
	}
	/*sldns_buffer_set_limit(info->buf, r);
	log_buf(0, "reply", info->buf);*/
	perfsend(info, n, now);
}
Exemplo n.º 12
0
/** recv new waiting packets */
static void
service_recv(int s, struct ringbuf* ring, sldns_buffer* pkt, 
	fd_set* rorig, int* max, struct proxy** proxies,
	struct sockaddr_storage* srv_addr, socklen_t srv_len, 
	struct timeval* now, struct timeval* delay, struct timeval* reuse)
{
	int i;
	struct sockaddr_storage from;
	socklen_t from_len;
	ssize_t len;
	struct proxy* p;
	for(i=0; i<TRIES_PER_SELECT; i++) {
		from_len = (socklen_t)sizeof(from);
		len = recvfrom(s, (void*)sldns_buffer_begin(pkt),
			sldns_buffer_capacity(pkt), 0,
			(struct sockaddr*)&from, &from_len);
		if(len < 0) {
#ifndef USE_WINSOCK
			if(errno == EAGAIN || errno == EINTR)
				return;
			fatal_exit("recvfrom: %s", strerror(errno));
#else
			if(WSAGetLastError() == WSAEWOULDBLOCK || 
				WSAGetLastError() == WSAEINPROGRESS)
				return;
			fatal_exit("recvfrom: %s", 
				wsa_strerror(WSAGetLastError()));
#endif
		}
		sldns_buffer_set_limit(pkt, (size_t)len);
		/* find its proxy element */
		p = find_create_proxy(&from, from_len, rorig, max, proxies,
			addr_is_ip6(srv_addr, srv_len), now, reuse);
		if(!p) fatal_exit("error: cannot find or create proxy");
		p->lastuse = *now;
		ring_add(ring, pkt, now, delay, p);
		p->numwait++;
		log_addr(1, "recv from client", &p->addr, p->addr_len);
	}
}
Exemplo n.º 13
0
/** do proxy for one readable client */
static void
do_proxy(struct proxy* p, int retsock, sldns_buffer* pkt)
{
	int i;
	ssize_t r;
	for(i=0; i<TRIES_PER_SELECT; i++) {
		r = recv(p->s, (void*)sldns_buffer_begin(pkt), 
			sldns_buffer_capacity(pkt), 0);
		if(r == -1) {
#ifndef USE_WINSOCK
			if(errno == EAGAIN || errno == EINTR)
				return;
			log_err("recv: %s", strerror(errno));
#else
			if(WSAGetLastError() == WSAEINPROGRESS ||
				WSAGetLastError() == WSAEWOULDBLOCK)
				return;
			log_err("recv: %s", wsa_strerror(WSAGetLastError()));
#endif
			return;
		}
		sldns_buffer_set_limit(pkt, (size_t)r);
		log_addr(1, "return reply to client", &p->addr, p->addr_len);
		/* send reply back to the real client */
		p->numreturn++;
		r = sendto(retsock, (void*)sldns_buffer_begin(pkt), (size_t)r, 
			0, (struct sockaddr*)&p->addr, p->addr_len);
		if(r == -1) {
#ifndef USE_WINSOCK
			log_err("sendto: %s", strerror(errno));
#else
			log_err("sendto: %s", wsa_strerror(WSAGetLastError()));
#endif
		}
	}
}
Exemplo n.º 14
0
/** read a line from ssl into buffer */
static int
ssl_read_buf(SSL* ssl, sldns_buffer* buf)
{
	return ssl_read_line(ssl, (char*)sldns_buffer_begin(buf), 
		sldns_buffer_capacity(buf));
}
Exemplo n.º 15
0
/** Report on memory usage by this thread and global */
static void
worker_mem_report(struct worker* ATTR_UNUSED(worker), 
	struct serviced_query* ATTR_UNUSED(cur_serv))
{
#ifdef UNBOUND_ALLOC_STATS
	/* debug func in validator module */
	size_t total, front, back, mesh, msg, rrset, infra, ac, superac;
	size_t me, iter, val, anch;
	int i;
	if(verbosity < VERB_ALGO) 
		return;
	front = listen_get_mem(worker->front);
	back = outnet_get_mem(worker->back);
	msg = slabhash_get_mem(worker->env.msg_cache);
	rrset = slabhash_get_mem(&worker->env.rrset_cache->table);
	infra = infra_get_mem(worker->env.infra_cache);
	mesh = mesh_get_mem(worker->env.mesh);
	ac = alloc_get_mem(&worker->alloc);
	superac = alloc_get_mem(&worker->daemon->superalloc);
	anch = anchors_get_mem(worker->env.anchors);
	iter = 0;
	val = 0;
	for(i=0; i<worker->env.mesh->mods.num; i++) {
		fptr_ok(fptr_whitelist_mod_get_mem(worker->env.mesh->
			mods.mod[i]->get_mem));
		if(strcmp(worker->env.mesh->mods.mod[i]->name, "validator")==0)
			val += (*worker->env.mesh->mods.mod[i]->get_mem)
				(&worker->env, i);
		else	iter += (*worker->env.mesh->mods.mod[i]->get_mem)
				(&worker->env, i);
	}
	me = sizeof(*worker) + sizeof(*worker->base) + sizeof(*worker->comsig)
		+ comm_point_get_mem(worker->cmd_com) 
		+ sizeof(worker->rndstate) 
		+ regional_get_mem(worker->scratchpad) 
		+ sizeof(*worker->env.scratch_buffer) 
		+ sldns_buffer_capacity(worker->env.scratch_buffer)
		+ forwards_get_mem(worker->env.fwds)
		+ hints_get_mem(worker->env.hints);
	if(worker->thread_num == 0)
		me += acl_list_get_mem(worker->daemon->acl);
	if(cur_serv) {
		me += serviced_get_mem(cur_serv);
	}
	total = front+back+mesh+msg+rrset+infra+iter+val+ac+superac+me;
	log_info("Memory conditions: %u front=%u back=%u mesh=%u msg=%u "
		"rrset=%u infra=%u iter=%u val=%u anchors=%u "
		"alloccache=%u globalalloccache=%u me=%u",
		(unsigned)total, (unsigned)front, (unsigned)back, 
		(unsigned)mesh, (unsigned)msg, (unsigned)rrset, 
		(unsigned)infra, (unsigned)iter, (unsigned)val, (unsigned)anch,
		(unsigned)ac, (unsigned)superac, (unsigned)me);
	debug_total_mem(total);
#else /* no UNBOUND_ALLOC_STATS */
	size_t val = 0;
	int i;
	if(verbosity < VERB_QUERY)
		return;
	for(i=0; i<worker->env.mesh->mods.num; i++) {
		fptr_ok(fptr_whitelist_mod_get_mem(worker->env.mesh->
			mods.mod[i]->get_mem));
		if(strcmp(worker->env.mesh->mods.mod[i]->name, "validator")==0)
			val += (*worker->env.mesh->mods.mod[i]->get_mem)
				(&worker->env, i);
	}
	verbose(VERB_QUERY, "cache memory msg=%u rrset=%u infra=%u val=%u",
		(unsigned)slabhash_get_mem(worker->env.msg_cache),
		(unsigned)slabhash_get_mem(&worker->env.rrset_cache->table),
		(unsigned)infra_get_mem(worker->env.infra_cache),
		(unsigned)val);
#endif /* UNBOUND_ALLOC_STATS */
}
Exemplo n.º 16
0
/** receive DNS datagram over TCP and print it */
static void
recv_one(int fd, int udp, SSL* ssl, sldns_buffer* buf)
{
	char* pktstr;
	uint16_t len;
	if(!udp) {
		if(ssl) {
			if(SSL_read(ssl, (void*)&len, (int)sizeof(len)) <= 0) {
				log_crypto_err("could not SSL_read");
				exit(1);
			}
		} else {
			if(recv(fd, (void*)&len, sizeof(len), 0) <
				(ssize_t)sizeof(len)) {
#ifndef USE_WINSOCK
				perror("read() len failed");
#else
				printf("read len: %s\n", 
					wsa_strerror(WSAGetLastError()));
#endif
				exit(1);
			}
		}
		len = ntohs(len);
		sldns_buffer_clear(buf);
		sldns_buffer_set_limit(buf, len);
		if(ssl) {
			int r = SSL_read(ssl, (void*)sldns_buffer_begin(buf),
				(int)len);
			if(r <= 0) {
				log_crypto_err("could not SSL_read");
				exit(1);
			}
			if(r != (int)len)
				fatal_exit("ssl_read %d of %d", r, len);
		} else {
			if(recv(fd, (void*)sldns_buffer_begin(buf), len, 0) < 
				(ssize_t)len) {
#ifndef USE_WINSOCK
				perror("read() data failed");
#else
				printf("read data: %s\n", 
					wsa_strerror(WSAGetLastError()));
#endif
				exit(1);
			}
		}
	} else {
		ssize_t l;
		sldns_buffer_clear(buf);
		if((l=recv(fd, (void*)sldns_buffer_begin(buf), 
			sldns_buffer_capacity(buf), 0)) < 0) {
#ifndef USE_WINSOCK
			perror("read() data failed");
#else
			printf("read data: %s\n", 
				wsa_strerror(WSAGetLastError()));
#endif
			exit(1);
		}
		sldns_buffer_set_limit(buf, (size_t)l);
		len = (size_t)l;
	}
	printf("\nnext received packet\n");
	log_buf(0, "data", buf);

	pktstr = sldns_wire2str_pkt(sldns_buffer_begin(buf), len);
	printf("%s", pktstr);
	free(pktstr);
}
Exemplo n.º 17
0
/** accept new TCP connections, and set them up */
static void
service_tcp_listen(int s, fd_set* rorig, int* max, struct tcp_proxy** proxies,
	struct sockaddr_storage* srv_addr, socklen_t srv_len, 
	struct timeval* now, struct timeval* tcp_timeout)
{
	int newfd;
	struct sockaddr_storage addr;
	struct tcp_proxy* p;
	socklen_t addr_len;
	newfd = accept(s, (struct sockaddr*)&addr, &addr_len);
	if(newfd == -1) {
#ifndef USE_WINSOCK
		if(errno == EAGAIN || errno == EINTR)
			return;
		fatal_exit("accept: %s", strerror(errno));
#else
		if(WSAGetLastError() == WSAEWOULDBLOCK || 
			WSAGetLastError() == WSAEINPROGRESS ||
			WSAGetLastError() == WSAECONNRESET)
			return;
		fatal_exit("accept: %s", wsa_strerror(WSAGetLastError()));
#endif
	}
	p = (struct tcp_proxy*)calloc(1, sizeof(*p));
	if(!p) fatal_exit("out of memory");
	memmove(&p->addr, &addr, addr_len);
	p->addr_len = addr_len;
	log_addr(1, "new tcp proxy", &p->addr, p->addr_len);
	p->client_s = newfd;
	p->server_s = socket(addr_is_ip6(srv_addr, srv_len)?AF_INET6:AF_INET,
		SOCK_STREAM, 0);
	if(p->server_s == -1) {
#ifndef USE_WINSOCK
		fatal_exit("tcp socket: %s", strerror(errno));
#else
		fatal_exit("tcp socket: %s", wsa_strerror(WSAGetLastError()));
#endif
	}
	fd_set_nonblock(p->client_s);
	fd_set_nonblock(p->server_s);
	if(connect(p->server_s, (struct sockaddr*)srv_addr, srv_len) == -1) {
#ifndef USE_WINSOCK
		if(errno != EINPROGRESS) {
			log_err("tcp connect: %s", strerror(errno));
			close(p->server_s);
			close(p->client_s);
#else
		if(WSAGetLastError() != WSAEWOULDBLOCK &&
			WSAGetLastError() != WSAEINPROGRESS) {
			log_err("tcp connect: %s", 
				wsa_strerror(WSAGetLastError()));
			closesocket(p->server_s);
			closesocket(p->client_s);
#endif
			free(p);
			return;
		}
	}
	p->timeout = *now;
	dl_tv_add(&p->timeout, tcp_timeout);

	/* listen to client and server */
	FD_SET(FD_SET_T p->client_s, rorig);
	FD_SET(FD_SET_T p->server_s, rorig);
	if(p->client_s+1 > *max)
		*max = p->client_s+1;
	if(p->server_s+1 > *max)
		*max = p->server_s+1;

	/* add into proxy list */
	p->next = *proxies;
	*proxies = p;
}

/** relay TCP, read a part */
static int
tcp_relay_read(int s, struct tcp_send_list** first, 
	struct tcp_send_list** last, struct timeval* now, 
	struct timeval* delay, sldns_buffer* pkt)
{
	struct tcp_send_list* item;
	ssize_t r = recv(s, (void*)sldns_buffer_begin(pkt), 
		sldns_buffer_capacity(pkt), 0);
	if(r == -1) {
#ifndef USE_WINSOCK
		if(errno == EINTR || errno == EAGAIN)
			return 1;
		log_err("tcp read: %s", strerror(errno));
#else
		if(WSAGetLastError() == WSAEINPROGRESS || 
			WSAGetLastError() == WSAEWOULDBLOCK)
			return 1;
		log_err("tcp read: %s", wsa_strerror(WSAGetLastError()));
#endif
		return 0;
	} else if(r == 0) {
		/* connection closed */
		return 0;
	}
	item = (struct tcp_send_list*)malloc(sizeof(*item));
	if(!item) {
		log_err("out of memory");
		return 0;
	}
	verbose(1, "read item len %d", (int)r);
	item->len = (size_t)r;
	item->item = memdup(sldns_buffer_begin(pkt), item->len);
	if(!item->item) {
		free(item);
		log_err("out of memory");
		return 0;
	}
	item->done = 0;
	item->wait = *now;
	dl_tv_add(&item->wait, delay);
	item->next = NULL;
	
	/* link in */
	if(*first) {
		(*last)->next = item;
	} else {
		*first = item;
	}
	*last = item;
	return 1;
}

/** relay TCP, write a part */
static int
tcp_relay_write(int s, struct tcp_send_list** first, 
	struct tcp_send_list** last, struct timeval* now)
{
	ssize_t r;
	struct tcp_send_list* p;
	while(*first) {
		p = *first;
		/* is the item ready? */
		if(!dl_tv_smaller(&p->wait, now))
			return 1;
		/* write it */
		r = send(s, (void*)(p->item + p->done), p->len - p->done, 0);
		if(r == -1) {
#ifndef USE_WINSOCK
			if(errno == EAGAIN || errno == EINTR)
				return 1;
			log_err("tcp write: %s", strerror(errno));
#else
			if(WSAGetLastError() == WSAEWOULDBLOCK || 
				WSAGetLastError() == WSAEINPROGRESS)
				return 1;
			log_err("tcp write: %s", 
				wsa_strerror(WSAGetLastError()));
#endif
			return 0;
		} else if(r == 0) {
			/* closed */
			return 0;
		}
		/* account it */
		p->done += (size_t)r;
		verbose(1, "write item %d of %d", (int)p->done, (int)p->len);
		if(p->done >= p->len) {
			free(p->item);
			*first = p->next;
			if(!*first)
				*last = NULL;
			free(p);
		} else {
			/* partial write */
			return 1;
		}
	}
	return 1;
}