gint system_pipe2(gint pipefds[2], gint flags) { /* we only support non-blocking sockets, and require * SOCK_NONBLOCK to be set immediately */ gboolean isBlocking = TRUE; /* clear non-blocking flags if set to get true type */ if(flags & O_NONBLOCK) { flags = flags & ~O_NONBLOCK; isBlocking = FALSE; } if(flags & O_CLOEXEC) { flags = flags & ~O_CLOEXEC; isBlocking = FALSE; } /* check inputs for what we support */ if(isBlocking) { warning("we only support non-blocking pipes: please bitwise OR 'O_NONBLOCK' with flags"); errno = EINVAL; return -1; } Node* node = _system_switchInShadowContext(); gint handle = node_createDescriptor(node, DT_PIPE); Channel* channel = (Channel*) node_lookupDescriptor(node, handle); gint linkedHandle = channel_getLinkedHandle(channel); _system_switchOutShadowContext(node); pipefds[0] = handle; /* reader */ pipefds[1] = linkedHandle; /* writer */ return 0; }
gint system_socketPair(gint domain, gint type, gint protocol, gint fds[2]) { /* create a pair of connected sockets, i.e. a bi-directional pipe */ if(domain != AF_UNIX) { errno = EAFNOSUPPORT; return -1; } /* only support non-blocking sockets */ gboolean isBlocking = FALSE; /* clear non-blocking flags if set to get true type */ gint realType = type; if(realType & SOCK_NONBLOCK) { realType = realType & ~SOCK_NONBLOCK; isBlocking = FALSE; } if(realType & SOCK_CLOEXEC) { realType = realType & ~SOCK_CLOEXEC; isBlocking = FALSE; } if(realType != SOCK_STREAM) { errno = EPROTONOSUPPORT; return -1; } gint result = 0; Node* node = _system_switchInShadowContext(); if(isBlocking) { warning("we only support non-blocking sockets: please bitwise OR 'SOCK_NONBLOCK' with type flags"); errno = EPROTONOSUPPORT; result = -1; } if(result == 0) { gint handle = node_createDescriptor(node, DT_SOCKETPAIR); Channel* channel = (Channel*) node_lookupDescriptor(node, handle); gint linkedHandle = channel_getLinkedHandle(channel); fds[0] = handle; fds[1] = linkedHandle; } _system_switchOutShadowContext(node); return result; }
gint system_ioctl(int fd, unsigned long int request, va_list farg) { /* check if this is a socket */ if(fd < MIN_DESCRIPTOR){ errno = EBADF; return -1; } gint result = 0; /* normally, the type of farg depends on the request */ Node* node = _system_switchInShadowContext(); Descriptor* descriptor = node_lookupDescriptor(node, fd); if(descriptor) { DescriptorType t = descriptor_getType(descriptor); if(t == DT_TCPSOCKET || t == DT_UDPSOCKET) { Socket* socket = (Socket*) descriptor; if(request == SIOCINQ || request == FIONREAD) { gsize bufferLength = socket_getInputBufferLength(socket); gint* lengthOut = va_arg(farg, int*); *lengthOut = (gint)bufferLength; } else if (request == SIOCOUTQ || request == TIOCOUTQ) {
/* return TRUE if the packet should be retransmitted */ gboolean tcp_processPacket(TCP* tcp, Packet* packet) { MAGIC_ASSERT(tcp); /* fetch the TCP info from the packet */ PacketTCPHeader header; packet_getTCPHeader(packet, &header); guint packetLength = packet_getPayloadLength(packet); /* if we run a server, the packet could be for an existing child */ tcp = _tcp_getSourceTCP(tcp, header.sourceIP, header.sourcePort); /* now we have the true TCP for the packet */ MAGIC_ASSERT(tcp); /* print packet info for debugging */ debug("%s <-> %s: processing packet# %u length %u", tcp->super.boundString, tcp->super.peerString, header.sequence, packetLength); /* if packet is reset, don't process */ if(header.flags & PTCP_RST) { /* @todo: not sure if this is handled correctly */ debug("received RESET packet"); if(!(tcp->state & TCPS_LISTEN) && !(tcp->error & TCPE_CONNECTION_RESET)) { tcp->error |= TCPE_CONNECTION_RESET; tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_TIMEWAIT); /* it will send no more user data after what we have now */ tcp->receive.end = tcp->receive.next; } packet_unref(packet); return FALSE; } /* if we are a server, we have to remember who we got this from so we can * respond back to them. this is because we could be bound to several * interfaces and otherwise cant decide which to send on. */ if(tcp->server) { tcp->server->lastPeerIP = header.sourceIP; tcp->server->lastPeerPort = header.sourcePort; tcp->server->lastIP = header.destinationIP; } /* go through the state machine, tracking processing and response */ gboolean wasProcessed = FALSE; enum ProtocolTCPFlags responseFlags = PTCP_NONE; switch(tcp->state) { case TCPS_LISTEN: { /* receive SYN, send SYNACK, move to SYNRECEIVED */ if(header.flags & PTCP_SYN) { MAGIC_ASSERT(tcp->server); wasProcessed = TRUE; /* we need to multiplex a new child */ Node* node = worker_getPrivate()->cached_node; gint multiplexedHandle = node_createDescriptor(node, DT_TCPSOCKET); TCP* multiplexed = (TCP*) node_lookupDescriptor(node, multiplexedHandle); multiplexed->child = _tcpchild_new(multiplexed, tcp, header.sourceIP, header.sourcePort); g_assert(g_hash_table_lookup(tcp->server->children, &(multiplexed->child->key)) == NULL); g_hash_table_replace(tcp->server->children, &(multiplexed->child->key), multiplexed->child); multiplexed->receive.start = header.sequence; multiplexed->receive.next = multiplexed->receive.start + 1; debug("%s <-> %s: server multiplexed child socket %s <-> %s", tcp->super.boundString, tcp->super.peerString, multiplexed->super.boundString, multiplexed->super.peerString); _tcp_setState(multiplexed, TCPS_SYNRECEIVED); /* parent will send response */ responseFlags = PTCP_SYN|PTCP_ACK; } break; } case TCPS_SYNSENT: { /* receive SYNACK, send ACK, move to ESTABLISHED */ if((header.flags & PTCP_SYN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; tcp->receive.start = header.sequence; tcp->receive.next = tcp->receive.start + 1; responseFlags |= PTCP_ACK; _tcp_setState(tcp, TCPS_ESTABLISHED); } /* receive SYN, send ACK, move to SYNRECEIVED (simultaneous open) */ else if(header.flags & PTCP_SYN) { wasProcessed = TRUE; tcp->receive.start = header.sequence; tcp->receive.next = tcp->receive.start + 1; responseFlags |= PTCP_ACK; _tcp_setState(tcp, TCPS_SYNRECEIVED); } break; } case TCPS_SYNRECEIVED: { /* receive ACK, move to ESTABLISHED */ if(header.flags & PTCP_ACK) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_ESTABLISHED); /* if this is a child, mark it accordingly */ if(tcp->child) { tcp->child->state = TCPCS_PENDING; g_queue_push_tail(tcp->child->parent->server->pending, tcp->child); /* user should accept new child from parent */ descriptor_adjustStatus(&(tcp->child->parent->super.super.super), DS_READABLE, TRUE); } } break; } case TCPS_ESTABLISHED: { /* receive FIN, send FINACK, move to CLOSEWAIT */ if(header.flags & PTCP_FIN) { wasProcessed = TRUE; /* other side of connections closed */ tcp->flags |= TCPF_REMOTE_CLOSED; responseFlags |= (PTCP_FIN|PTCP_ACK); _tcp_setState(tcp, TCPS_CLOSEWAIT); /* remote will send us no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_FINWAIT1: { /* receive FINACK, move to FINWAIT2 */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_FINWAIT2); } /* receive FIN, send FINACK, move to CLOSING (simultaneous close) */ else if(header.flags & PTCP_FIN) { wasProcessed = TRUE; responseFlags |= (PTCP_FIN|PTCP_ACK); tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_CLOSING); /* it will send no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_FINWAIT2: { /* receive FIN, send FINACK, move to TIMEWAIT */ if(header.flags & PTCP_FIN) { wasProcessed = TRUE; responseFlags |= (PTCP_FIN|PTCP_ACK); tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_TIMEWAIT); /* it will send no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_CLOSING: { /* receive FINACK, move to TIMEWAIT */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_TIMEWAIT); } break; } case TCPS_TIMEWAIT: { break; } case TCPS_CLOSEWAIT: { break; } case TCPS_LASTACK: { /* receive FINACK, move to CLOSED */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_CLOSED); /* we closed, cant use tcp anymore, no retransmit */ packet_unref(packet); return FALSE; } break; } case TCPS_CLOSED: { /* stray packet, drop without retransmit */ packet_unref(packet); return FALSE; break; } default: { break; } } gint nPacketsAcked = 0; /* check if we can update some TCP control info */ if(header.flags & PTCP_ACK) { wasProcessed = TRUE; if((header.acknowledgement > tcp->send.unacked) && (header.acknowledgement <= tcp->send.next)) { /* some data we sent got acknowledged */ nPacketsAcked = header.acknowledgement - tcp->send.unacked; /* the packets just acked are 'released' from retransmit queue */ for(guint i = tcp->send.unacked; i < header.acknowledgement; i++) { _tcp_removeRetransmit(tcp, i); } tcp->send.unacked = header.acknowledgement; /* update congestion window and keep track of when it was updated */ tcp->congestion.lastWindow = header.window; tcp->congestion.lastSequence = header.sequence; tcp->congestion.lastAcknowledgement = header.acknowledgement; } } gboolean doRetransmitData = FALSE; /* check if the packet carries user data for us */ if(packetLength > 0) { /* it has data, check if its in the correct range */ if(header.sequence >= (tcp->receive.next + tcp->receive.window)) { /* its too far ahead to accept now, but they should re-send it */ wasProcessed = TRUE; doRetransmitData = TRUE; } else if(header.sequence >= tcp->receive.next) { /* its in our window, so we can accept the data */ wasProcessed = TRUE; /* * if this is THE next packet, we MUST accept it to avoid * deadlocks (unless we are blocked b/c user should read) */ gboolean isNextPacket = (header.sequence == tcp->receive.next) ? TRUE : FALSE; gboolean waitingUserRead = (socket_getInputBufferSpace(&(tcp->super)) > 0) ? TRUE : FALSE; gboolean packetFits = (packetLength <= _tcp_getBufferSpaceIn(tcp)) ? TRUE : FALSE; if((isNextPacket && !waitingUserRead) || (packetFits)) { /* make sure its in order */ _tcp_bufferPacketIn(tcp, packet); } else { debug("no space for packet even though its in our window"); doRetransmitData = TRUE; } } } /* if it is a spurious packet, send a reset */ if(!wasProcessed) { g_assert(responseFlags == PTCP_NONE); responseFlags = PTCP_RST; } /* try to update congestion window based on potentially new info */ _tcp_updateCongestionWindow(tcp, nPacketsAcked); /* now flush as many packets as we can to socket */ _tcp_flush(tcp); /* send ack if they need updates but we didn't send any yet (selective acks) */ if((tcp->receive.next > tcp->send.lastAcknowledgement) || (tcp->receive.window != tcp->send.lastWindow)) { responseFlags |= PTCP_ACK; } /* send control packet if we have one */ if(responseFlags != PTCP_NONE) { debug("%s <-> %s: sending response control packet", tcp->super.boundString, tcp->super.peerString); Packet* response = _tcp_createPacket(tcp, responseFlags, NULL, 0); _tcp_bufferPacketOut(tcp, response); _tcp_flush(tcp); } /* we should free packets that are done but were not buffered */ if(!doRetransmitData && packetLength <= 0) { packet_unref(packet); } return doRetransmitData; }
static Channel* channel_getLinkedChannel(Channel* channel) { MAGIC_ASSERT(channel); return (Channel*)node_lookupDescriptor(worker_getPrivate()->cached_node, channel->linkedHandle); }
gint system_setSockOpt(gint fd, gint level, gint optname, const gpointer optval, socklen_t optlen) { if(!optval) { errno = EFAULT; return -1; } Node* node = _system_switchInShadowContext(); Descriptor* descriptor = node_lookupDescriptor(node, fd); gint result = 0; /* TODO: implement socket options */ if(descriptor) { if(level == SOL_SOCKET) { DescriptorType t = descriptor_getType(descriptor); switch (optname) { case SO_SNDBUF: { if(optlen < sizeof(gint)) { warning("called setsockopt with SO_SNDBUF with optlen < %i", (gint)(sizeof(gint))); errno = EINVAL; result = -1; } else if (t != DT_TCPSOCKET && t != DT_UDPSOCKET) { warning("called setsockopt with SO_SNDBUF on non-socket"); errno = ENOPROTOOPT; result = -1; } else { gint v = *((gint*) optval); socket_setOutputBufferSize((Socket*)descriptor, (gsize)v*2); } break; } case SO_RCVBUF: { if(optlen < sizeof(gint)) { warning("called setsockopt with SO_RCVBUF with optlen < %i", (gint)(sizeof(gint))); errno = EINVAL; result = -1; } else if (t != DT_TCPSOCKET && t != DT_UDPSOCKET) { warning("called setsockopt with SO_RCVBUF on non-socket"); errno = ENOPROTOOPT; result = -1; } else { gint v = *((gint*) optval); socket_setInputBufferSize((Socket*)descriptor, (gsize)v*2); } break; } case SO_REUSEADDR: { // TODO implement this! // XXX Tor actually uses this option!! debug("setsockopt SO_REUSEADDR not yet implemented"); break; } default: { warning("setsockopt optname %i not implemented", optname); errno = ENOSYS; result = -1; break; } } } else { warning("setsockopt level %i not implemented", level); errno = ENOSYS; result = -1; } } else { errno = EBADF; result = -1; } _system_switchOutShadowContext(node); return result; }
gint system_getSockOpt(gint fd, gint level, gint optname, gpointer optval, socklen_t* optlen) { if(!optlen) { errno = EFAULT; return -1; } Node* node = _system_switchInShadowContext(); Descriptor* descriptor = node_lookupDescriptor(node, fd); gint result = 0; /* TODO: implement socket options */ if(descriptor) { if(level == SOL_SOCKET || level == SOL_IP || level == SOL_TCP) { DescriptorType t = descriptor_getType(descriptor); switch (optname) { case TCP_INFO: { if(t == DT_TCPSOCKET) { if(optval) { TCP* tcp = (TCP*)descriptor; tcp_getInfo(tcp, (struct tcp_info *)optval); } *optlen = sizeof(struct tcp_info); result = 0; } else { warning("called getsockopt with TCP_INFO on non-TCP socket"); errno = ENOPROTOOPT; result = -1; } break; } case SO_SNDBUF: { if(*optlen < sizeof(gint)) { warning("called getsockopt with SO_SNDBUF with optlen < %i", (gint)(sizeof(gint))); errno = EINVAL; result = -1; } else if (t != DT_TCPSOCKET && t != DT_UDPSOCKET) { warning("called getsockopt with SO_SNDBUF on non-socket"); errno = ENOPROTOOPT; result = -1; } else { if(optval) { *((gint*) optval) = (gint) socket_getOutputBufferSize((Socket*)descriptor); } *optlen = sizeof(gint); } break; } case SO_RCVBUF: { if(*optlen < sizeof(gint)) { warning("called getsockopt with SO_RCVBUF with optlen < %i", (gint)(sizeof(gint))); errno = EINVAL; result = -1; } else if (t != DT_TCPSOCKET && t != DT_UDPSOCKET) { warning("called getsockopt with SO_RCVBUF on non-socket"); errno = ENOPROTOOPT; result = -1; } else { if(optval) { *((gint*) optval) = (gint) socket_getInputBufferSize((Socket*)descriptor); } *optlen = sizeof(gint); } break; } case SO_ERROR: { if(optval) { *((gint*)optval) = 0; } *optlen = sizeof(gint); result = 0; break; } default: { warning("getsockopt optname %i not implemented", optname); errno = ENOSYS; result = -1; break; } } } else { warning("getsockopt level %i not implemented", level); errno = ENOSYS; result = -1; } } else { errno = EBADF; result = -1; } _system_switchOutShadowContext(node); return result; }