Beispiel #1
0
gint system_pipe2(gint pipefds[2], gint flags) {
	/* we only support non-blocking sockets, and require
	 * SOCK_NONBLOCK to be set immediately */
	gboolean isBlocking = TRUE;

	/* clear non-blocking flags if set to get true type */
	if(flags & O_NONBLOCK) {
		flags = flags & ~O_NONBLOCK;
		isBlocking = FALSE;
	}
	if(flags & O_CLOEXEC) {
		flags = flags & ~O_CLOEXEC;
		isBlocking = FALSE;
	}

	/* check inputs for what we support */
	if(isBlocking) {
		warning("we only support non-blocking pipes: please bitwise OR 'O_NONBLOCK' with flags");
		errno = EINVAL;
		return -1;
	}

	Node* node = _system_switchInShadowContext();

	gint handle = node_createDescriptor(node, DT_PIPE);

	Channel* channel = (Channel*) node_lookupDescriptor(node, handle);
	gint linkedHandle = channel_getLinkedHandle(channel);

	_system_switchOutShadowContext(node);

	pipefds[0] = handle; /* reader */
	pipefds[1] = linkedHandle; /* writer */
	return 0;
}
Beispiel #2
0
gint system_epollCreate(gint size) {
	/* size should be > 0, but can otherwise be completely ignored */
	if(size < 1) {
		errno = EINVAL;
		return -1;
	}

	/* switch into shadow and create the new descriptor */
	Node* node = _system_switchInShadowContext();
	gint handle = node_createDescriptor(node, DT_EPOLL);
	_system_switchOutShadowContext(node);

	return handle;
}
Beispiel #3
0
gint system_socketPair(gint domain, gint type, gint protocol, gint fds[2]) {
	/* create a pair of connected sockets, i.e. a bi-directional pipe */
	if(domain != AF_UNIX) {
		errno = EAFNOSUPPORT;
		return -1;
	}

	/* only support non-blocking sockets */
	gboolean isBlocking = FALSE;

	/* clear non-blocking flags if set to get true type */
	gint realType = type;
	if(realType & SOCK_NONBLOCK) {
		realType = realType & ~SOCK_NONBLOCK;
		isBlocking = FALSE;
	}
	if(realType & SOCK_CLOEXEC) {
		realType = realType & ~SOCK_CLOEXEC;
		isBlocking = FALSE;
	}

	if(realType != SOCK_STREAM) {
		errno = EPROTONOSUPPORT;
		return -1;
	}

	gint result = 0;
	Node* node = _system_switchInShadowContext();

	if(isBlocking) {
		warning("we only support non-blocking sockets: please bitwise OR 'SOCK_NONBLOCK' with type flags");
		errno = EPROTONOSUPPORT;
		result = -1;
	}

	if(result == 0) {
		gint handle = node_createDescriptor(node, DT_SOCKETPAIR);

		Channel* channel = (Channel*) node_lookupDescriptor(node, handle);
		gint linkedHandle = channel_getLinkedHandle(channel);

		fds[0] = handle;
		fds[1] = linkedHandle;
	}

	_system_switchOutShadowContext(node);
	return result;
}
Beispiel #4
0
gint system_socket(gint domain, gint type, gint protocol) {
	/* we only support non-blocking sockets, and require
	 * SOCK_NONBLOCK to be set immediately */
	gboolean isBlocking = FALSE;

	/* clear non-blocking flags if set to get true type */
	if(type & SOCK_NONBLOCK) {
		type = type & ~SOCK_NONBLOCK;
		isBlocking = FALSE;
	}
	if(type & SOCK_CLOEXEC) {
		type = type & ~SOCK_CLOEXEC;
		isBlocking = FALSE;
	}

	gint result = 0;
	Node* node = _system_switchInShadowContext();

	/* check inputs for what we support */
	if(isBlocking) {
		warning("we only support non-blocking sockets: please bitwise OR 'SOCK_NONBLOCK' with type flags");
		errno = EPROTONOSUPPORT;
		result = -1;
	} else if (type != SOCK_STREAM && type != SOCK_DGRAM) {
		warning("unsupported socket type \"%i\", we only support SOCK_STREAM and SOCK_DGRAM", type);
		errno = EPROTONOSUPPORT;
		result = -1;
	} else if(domain != AF_INET) {
		warning("trying to create socket with domain \"%i\", we only support PF_INET", domain);
		errno = EAFNOSUPPORT;
		result = -1;
	}

	if(result == 0) {
		/* we are all set to create the socket */
		DescriptorType dtype = type == SOCK_STREAM ? DT_TCPSOCKET : DT_UDPSOCKET;
		result = node_createDescriptor(node, dtype);
	}

	_system_switchOutShadowContext(node);
	return result;
}
Beispiel #5
0
/* return TRUE if the packet should be retransmitted */
gboolean tcp_processPacket(TCP* tcp, Packet* packet) {
	MAGIC_ASSERT(tcp);

	/* fetch the TCP info from the packet */
	PacketTCPHeader header;
	packet_getTCPHeader(packet, &header);
	guint packetLength = packet_getPayloadLength(packet);

	/* if we run a server, the packet could be for an existing child */
	tcp = _tcp_getSourceTCP(tcp, header.sourceIP, header.sourcePort);

	/* now we have the true TCP for the packet */
	MAGIC_ASSERT(tcp);

	/* print packet info for debugging */
	debug("%s <-> %s: processing packet# %u length %u",
			tcp->super.boundString, tcp->super.peerString, header.sequence, packetLength);

	/* if packet is reset, don't process */
	if(header.flags & PTCP_RST) {
		/* @todo: not sure if this is handled correctly */
		debug("received RESET packet");

		if(!(tcp->state & TCPS_LISTEN) && !(tcp->error & TCPE_CONNECTION_RESET)) {
			tcp->error |= TCPE_CONNECTION_RESET;
			tcp->flags |= TCPF_REMOTE_CLOSED;

			_tcp_setState(tcp, TCPS_TIMEWAIT);

			/* it will send no more user data after what we have now */
			tcp->receive.end = tcp->receive.next;
		}

		packet_unref(packet);
		return FALSE;
	}

	/* if we are a server, we have to remember who we got this from so we can
	 * respond back to them. this is because we could be bound to several
	 * interfaces and otherwise cant decide which to send on.
	 */
	if(tcp->server) {
		tcp->server->lastPeerIP = header.sourceIP;
		tcp->server->lastPeerPort = header.sourcePort;
		tcp->server->lastIP = header.destinationIP;
	}

	/* go through the state machine, tracking processing and response */
	gboolean wasProcessed = FALSE;
	enum ProtocolTCPFlags responseFlags = PTCP_NONE;

	switch(tcp->state) {
		case TCPS_LISTEN: {
			/* receive SYN, send SYNACK, move to SYNRECEIVED */
			if(header.flags & PTCP_SYN) {
				MAGIC_ASSERT(tcp->server);
				wasProcessed = TRUE;

				/* we need to multiplex a new child */
				Node* node = worker_getPrivate()->cached_node;
				gint multiplexedHandle = node_createDescriptor(node, DT_TCPSOCKET);
				TCP* multiplexed = (TCP*) node_lookupDescriptor(node, multiplexedHandle);

				multiplexed->child = _tcpchild_new(multiplexed, tcp, header.sourceIP, header.sourcePort);
				g_assert(g_hash_table_lookup(tcp->server->children, &(multiplexed->child->key)) == NULL);
				g_hash_table_replace(tcp->server->children, &(multiplexed->child->key), multiplexed->child);

				multiplexed->receive.start = header.sequence;
				multiplexed->receive.next = multiplexed->receive.start + 1;

				debug("%s <-> %s: server multiplexed child socket %s <-> %s",
						tcp->super.boundString, tcp->super.peerString,
						multiplexed->super.boundString, multiplexed->super.peerString);

				_tcp_setState(multiplexed, TCPS_SYNRECEIVED);

				/* parent will send response */
				responseFlags = PTCP_SYN|PTCP_ACK;
			}
			break;
		}

		case TCPS_SYNSENT: {
			/* receive SYNACK, send ACK, move to ESTABLISHED */
			if((header.flags & PTCP_SYN) && (header.flags & PTCP_ACK)) {
				wasProcessed = TRUE;
				tcp->receive.start = header.sequence;
				tcp->receive.next = tcp->receive.start + 1;

				responseFlags |= PTCP_ACK;
				_tcp_setState(tcp, TCPS_ESTABLISHED);
			}
			/* receive SYN, send ACK, move to SYNRECEIVED (simultaneous open) */
			else if(header.flags & PTCP_SYN) {
				wasProcessed = TRUE;
				tcp->receive.start = header.sequence;
				tcp->receive.next = tcp->receive.start + 1;

				responseFlags |= PTCP_ACK;
				_tcp_setState(tcp, TCPS_SYNRECEIVED);
			}

			break;
		}

		case TCPS_SYNRECEIVED: {
			/* receive ACK, move to ESTABLISHED */
			if(header.flags & PTCP_ACK) {
				wasProcessed = TRUE;
				_tcp_setState(tcp, TCPS_ESTABLISHED);

				/* if this is a child, mark it accordingly */
				if(tcp->child) {
					tcp->child->state = TCPCS_PENDING;
					g_queue_push_tail(tcp->child->parent->server->pending, tcp->child);
					/* user should accept new child from parent */
					descriptor_adjustStatus(&(tcp->child->parent->super.super.super), DS_READABLE, TRUE);
				}
			}
			break;
		}

		case TCPS_ESTABLISHED: {
			/* receive FIN, send FINACK, move to CLOSEWAIT */
			if(header.flags & PTCP_FIN) {
				wasProcessed = TRUE;

				/* other side of connections closed */
				tcp->flags |= TCPF_REMOTE_CLOSED;
				responseFlags |= (PTCP_FIN|PTCP_ACK);
				_tcp_setState(tcp, TCPS_CLOSEWAIT);

				/* remote will send us no more user data after this sequence */
				tcp->receive.end = header.sequence;
			}
			break;
		}

		case TCPS_FINWAIT1: {
			/* receive FINACK, move to FINWAIT2 */
			if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) {
				wasProcessed = TRUE;
				_tcp_setState(tcp, TCPS_FINWAIT2);
			}
			/* receive FIN, send FINACK, move to CLOSING (simultaneous close) */
			else if(header.flags & PTCP_FIN) {
				wasProcessed = TRUE;
				responseFlags |= (PTCP_FIN|PTCP_ACK);
				tcp->flags |= TCPF_REMOTE_CLOSED;
				_tcp_setState(tcp, TCPS_CLOSING);

				/* it will send no more user data after this sequence */
				tcp->receive.end = header.sequence;
			}
			break;
		}

		case TCPS_FINWAIT2: {
			/* receive FIN, send FINACK, move to TIMEWAIT */
			if(header.flags & PTCP_FIN) {
				wasProcessed = TRUE;
				responseFlags |= (PTCP_FIN|PTCP_ACK);
				tcp->flags |= TCPF_REMOTE_CLOSED;
				_tcp_setState(tcp, TCPS_TIMEWAIT);

				/* it will send no more user data after this sequence */
				tcp->receive.end = header.sequence;
			}
			break;
		}

		case TCPS_CLOSING: {
			/* receive FINACK, move to TIMEWAIT */
			if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) {
				wasProcessed = TRUE;
				_tcp_setState(tcp, TCPS_TIMEWAIT);
			}
			break;
		}

		case TCPS_TIMEWAIT: {
			break;
		}

		case TCPS_CLOSEWAIT: {
			break;
		}

		case TCPS_LASTACK: {
			/* receive FINACK, move to CLOSED */
			if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) {
				wasProcessed = TRUE;
				_tcp_setState(tcp, TCPS_CLOSED);
				/* we closed, cant use tcp anymore, no retransmit */
				packet_unref(packet);
				return FALSE;
			}
			break;
		}

		case TCPS_CLOSED: {
			/* stray packet, drop without retransmit */
			packet_unref(packet);
			return FALSE;
			break;
		}

		default: {
			break;
		}

	}

	gint nPacketsAcked = 0;

	/* check if we can update some TCP control info */
	if(header.flags & PTCP_ACK) {
		wasProcessed = TRUE;
		if((header.acknowledgement > tcp->send.unacked) && (header.acknowledgement <= tcp->send.next)) {
			/* some data we sent got acknowledged */
			nPacketsAcked = header.acknowledgement - tcp->send.unacked;

			/* the packets just acked are 'released' from retransmit queue */
			for(guint i = tcp->send.unacked; i < header.acknowledgement; i++) {
				_tcp_removeRetransmit(tcp, i);
			}

			tcp->send.unacked = header.acknowledgement;

			/* update congestion window and keep track of when it was updated */
			tcp->congestion.lastWindow = header.window;
			tcp->congestion.lastSequence = header.sequence;
			tcp->congestion.lastAcknowledgement = header.acknowledgement;
		}
	}

	gboolean doRetransmitData = FALSE;

	/* check if the packet carries user data for us */
	if(packetLength > 0) {
		/* it has data, check if its in the correct range */
		if(header.sequence >= (tcp->receive.next + tcp->receive.window)) {
			/* its too far ahead to accept now, but they should re-send it */
			wasProcessed = TRUE;
			doRetransmitData = TRUE;

		} else if(header.sequence >= tcp->receive.next) {
			/* its in our window, so we can accept the data */
			wasProcessed = TRUE;

			/*
			 * if this is THE next packet, we MUST accept it to avoid
			 * deadlocks (unless we are blocked b/c user should read)
			 */
			gboolean isNextPacket = (header.sequence == tcp->receive.next) ? TRUE : FALSE;
			gboolean waitingUserRead = (socket_getInputBufferSpace(&(tcp->super)) > 0) ? TRUE : FALSE;
			gboolean packetFits = (packetLength <= _tcp_getBufferSpaceIn(tcp)) ? TRUE : FALSE;

			if((isNextPacket && !waitingUserRead) || (packetFits)) {
				/* make sure its in order */
				_tcp_bufferPacketIn(tcp, packet);
			} else {
				debug("no space for packet even though its in our window");
				doRetransmitData = TRUE;
			}
		}
	}

	/* if it is a spurious packet, send a reset */
	if(!wasProcessed) {
		g_assert(responseFlags == PTCP_NONE);
		responseFlags = PTCP_RST;
	}

	/* try to update congestion window based on potentially new info */
	_tcp_updateCongestionWindow(tcp, nPacketsAcked);

	/* now flush as many packets as we can to socket */
	_tcp_flush(tcp);

	/* send ack if they need updates but we didn't send any yet (selective acks) */
	if((tcp->receive.next > tcp->send.lastAcknowledgement) ||
			(tcp->receive.window != tcp->send.lastWindow))
	{
		responseFlags |= PTCP_ACK;
	}

	/* send control packet if we have one */
	if(responseFlags != PTCP_NONE) {
		debug("%s <-> %s: sending response control packet",
				tcp->super.boundString, tcp->super.peerString);
		Packet* response = _tcp_createPacket(tcp, responseFlags, NULL, 0);
		_tcp_bufferPacketOut(tcp, response);
		_tcp_flush(tcp);
	}

	/* we should free packets that are done but were not buffered */
	if(!doRetransmitData && packetLength <= 0) {
		packet_unref(packet);
	}
	return doRetransmitData;
}