gboolean socket_addToOutputBuffer(Socket* socket, Packet* packet) { MAGIC_ASSERT(socket); /* check if the packet fits */ guint length = packet_getPayloadLength(packet); if(length > socket_getOutputBufferSpace(socket)) { return FALSE; } /* add to our queue */ g_queue_push_tail(socket->outputBuffer, packet); socket->outputBufferLength += length; packet_addDeliveryStatus(packet, PDS_SND_SOCKET_BUFFERED); /* update the tracker input buffer stats */ Tracker* tracker = host_getTracker(worker_getCurrentHost()); Descriptor* descriptor = (Descriptor *)socket; tracker_updateSocketOutputBuffer(tracker, descriptor->handle, socket->outputBufferLength, socket->outputBufferSize); /* we just added a packet, we are no longer writable if full */ if(socket_getOutputBufferSpace(socket) <= 0) { descriptor_adjustStatus((Descriptor*)socket, DS_WRITABLE, FALSE); } /* tell the interface to include us when sending out to the network */ in_addr_t ip = packet_getSourceIP(packet); NetworkInterface* interface = host_lookupInterface(worker_getCurrentHost(), ip); networkinterface_wantsSend(interface, socket); return TRUE; }
static void _tcp_bufferPacketOut(TCP* tcp, Packet* packet) { MAGIC_ASSERT(tcp); /* TCP wants to avoid congestion */ g_queue_insert_sorted(tcp->throttledOutput, packet, (GCompareDataFunc)packet_compareTCPSequence, NULL); tcp->throttledOutputLength += packet_getPayloadLength(packet); }
void tracker_addOutputBytes(Tracker* tracker, Packet* packet, gint handle) { MAGIC_ASSERT(tracker); TrackerFlags flags = _tracker_getFlags(tracker); if(!(flags & TRACKER_FLAGS_NODE) && !(flags & TRACKER_FLAGS_SOCKET)) { return; } gboolean isLocal = packet_getSourceIP(packet) == htonl(INADDR_LOOPBACK); gsize header = (gsize)packet_getHeaderSize(packet); gsize payload = (gsize)packet_getPayloadLength(packet); PacketDeliveryStatusFlags status = packet_getDeliveryStatus(packet); if(flags & TRACKER_FLAGS_NODE) { if(isLocal) { _tracker_updateCounters(&tracker->local.outCounters, header, payload, status); } else { _tracker_updateCounters(&tracker->remote.outCounters, header, payload, status); } } if(flags & TRACKER_FLAGS_SOCKET) { SocketStats* ss = g_hash_table_lookup(tracker->socketStats, &handle); if(ss) { if(isLocal) { _tracker_updateCounters(&ss->local.outCounters, header, payload, status); } else { _tracker_updateCounters(&ss->remote.outCounters, header, payload, status); } } } }
Packet* socket_removeFromOutputBuffer(Socket* socket) { MAGIC_ASSERT(socket); /* see if we have any packets */ Packet* packet = g_queue_pop_head(socket->outputBuffer); if(packet) { /* just removed a packet */ guint length = packet_getPayloadLength(packet); socket->outputBufferLength -= length; /* check if we need to reduce the buffer size */ if(socket->outputBufferSizePending > 0) { socket_setOutputBufferSize(socket, socket->outputBufferSizePending); } /* update the tracker input buffer stats */ Tracker* tracker = host_getTracker(worker_getCurrentHost()); Descriptor* descriptor = (Descriptor *)socket; tracker_updateSocketOutputBuffer(tracker, descriptor->handle, socket->outputBufferLength, socket->outputBufferSize); /* we are writable if we now have space */ if(socket_getOutputBufferSpace(socket) > 0) { descriptor_adjustStatus((Descriptor*)socket, DS_WRITABLE, TRUE); } } return packet; }
static void _tcp_bufferPacketIn(TCP* tcp, Packet* packet) { MAGIC_ASSERT(tcp); /* TCP wants in-order data */ g_queue_insert_sorted(tcp->unorderedInput, packet, (GCompareDataFunc)packet_compareTCPSequence, NULL); tcp->unorderedInputLength += packet_getPayloadLength(packet); }
gboolean socket_addToInputBuffer(Socket* socket, Packet* packet) { MAGIC_ASSERT(socket); /* check if the packet fits */ guint length = packet_getPayloadLength(packet); if(length > socket_getInputBufferSpace(socket)) { return FALSE; } /* add to our queue */ g_queue_push_tail(socket->inputBuffer, packet); packet_ref(packet); socket->inputBufferLength += length; packet_addDeliveryStatus(packet, PDS_RCV_SOCKET_BUFFERED); /* update the tracker input buffer stats */ Tracker* tracker = host_getTracker(worker_getCurrentHost()); Descriptor* descriptor = (Descriptor *)socket; tracker_updateSocketInputBuffer(tracker, descriptor->handle, socket->inputBufferLength, socket->inputBufferSize); /* we just added a packet, so we are readable */ if(socket->inputBufferLength > 0) { descriptor_adjustStatus((Descriptor*)socket, DS_READABLE, TRUE); } return TRUE; }
/* return TRUE if the packet should be retransmitted */ gboolean udp_processPacket(UDP* udp, Packet* packet) { MAGIC_ASSERT(udp); /* UDP packet contains data for user and can be buffered immediately */ if(packet_getPayloadLength(packet) > 0) { return socket_addToInputBuffer((Socket*)udp, packet); } return FALSE; }
void udp_processPacket(UDP* udp, Packet* packet) { MAGIC_ASSERT(udp); /* UDP packet contains data for user and can be buffered immediately */ if(packet_getPayloadLength(packet) > 0) { if(!socket_addToInputBuffer((Socket*)udp, packet)) { packet_addDeliveryStatus(packet, PDS_RCV_SOCKET_DROPPED); } } }
static void _networkinterface_scheduleNextReceive(NetworkInterface* interface) { /* the next packets need to be received and processed */ SimulationTime batchTime = worker_getConfig()->interfaceBatchTime; /* receive packets in batches */ while(!g_queue_is_empty(interface->inBuffer) && interface->receiveNanosecondsConsumed <= batchTime) { /* get the next packet */ Packet* packet = g_queue_pop_head(interface->inBuffer); utility_assert(packet); /* successfully received */ packet_addDeliveryStatus(packet, PDS_RCV_INTERFACE_RECEIVED); _networkinterface_pcapWritePacket(interface, packet); /* free up buffer space */ guint length = packet_getPayloadLength(packet) + packet_getHeaderSize(packet); interface->inBufferLength -= length; /* calculate how long it took to 'receive' this packet */ interface->receiveNanosecondsConsumed += (length * interface->timePerByteDown); /* hand it off to the correct socket layer */ gint key = packet_getDestinationAssociationKey(packet); Socket* socket = g_hash_table_lookup(interface->boundSockets, GINT_TO_POINTER(key)); /* if the socket closed, just drop the packet */ gint socketHandle = -1; if(socket) { socketHandle = *descriptor_getHandleReference((Descriptor*)socket); socket_pushInPacket(socket, packet); } else { packet_addDeliveryStatus(packet, PDS_RCV_INTERFACE_DROPPED); } packet_unref(packet); /* count our bandwidth usage by interface, and by socket handle if possible */ tracker_addInputBytes(host_getTracker(worker_getCurrentHost()),(guint64)length, socketHandle); } /* * we need to call back and try to receive more, even if we didnt consume all * of our batch time, because we might have more packets to receive then. */ SimulationTime receiveTime = (SimulationTime) floor(interface->receiveNanosecondsConsumed); if(receiveTime >= SIMTIME_ONE_NANOSECOND) { /* we are 'receiving' the packets */ interface->flags |= NIF_RECEIVING; /* call back when the packets are 'received' */ InterfaceReceivedEvent* event = interfacereceived_new(interface); /* event destination is our node */ worker_scheduleEvent((Event*)event, receiveTime, 0); } }
static void _networkinterface_scheduleNextReceive(NetworkInterface* interface) { /* the next packets need to be received and processed */ SimulationTime batchTime = worker_getConfig()->interfaceBatchTime; /* receive packets in batches */ while(!g_queue_is_empty(interface->inBuffer) && interface->receiveNanosecondsConsumed <= batchTime) { /* get the next packet */ Packet* packet = g_queue_pop_head(interface->inBuffer); g_assert(packet); /* free up buffer space */ guint length = packet_getPayloadLength(packet) + packet_getHeaderSize(packet); interface->inBufferLength -= length; /* hand it off to the correct socket layer */ gint key = packet_getDestinationAssociationKey(packet); Socket* socket = g_hash_table_lookup(interface->boundSockets, GINT_TO_POINTER(key)); gchar* packetString = packet_getString(packet); debug("packet in: %s", packetString); g_free(packetString); _networkinterface_pcapWritePacket(interface, packet); /* if the socket closed, just drop the packet */ gint socketHandle = -1; if(socket) { socketHandle = *descriptor_getHandleReference((Descriptor*)socket); gboolean needsRetransmit = socket_pushInPacket(socket, packet); if(needsRetransmit) { /* socket can not handle it now, so drop it */ _networkinterface_dropInboundPacket(interface, packet); } } /* successfully received, calculate how long it took to 'receive' this packet */ interface->receiveNanosecondsConsumed += (length * interface->timePerByteDown); tracker_addInputBytes(node_getTracker(worker_getPrivate()->cached_node),(guint64)length, socketHandle); } /* * we need to call back and try to receive more, even if we didnt consume all * of our batch time, because we might have more packets to receive then. */ SimulationTime receiveTime = (SimulationTime) floor(interface->receiveNanosecondsConsumed); if(receiveTime >= SIMTIME_ONE_NANOSECOND) { /* we are 'receiving' the packets */ interface->flags |= NIF_RECEIVING; /* call back when the packets are 'received' */ InterfaceReceivedEvent* event = interfacereceived_new(interface); /* event destination is our node */ worker_scheduleEvent((Event*)event, receiveTime, 0); } }
void worker_schedulePacket(Packet* packet) { /* get our thread-private worker */ Worker* worker = _worker_getPrivate(); if(slave_isKilled(worker->slave)) { /* the simulation is over, don't bother */ return; } in_addr_t srcIP = packet_getSourceIP(packet); in_addr_t dstIP = packet_getDestinationIP(packet); Address* srcAddress = dns_resolveIPToAddress(worker_getDNS(), (guint32) srcIP); Address* dstAddress = dns_resolveIPToAddress(worker_getDNS(), (guint32) dstIP); if(!srcAddress || !dstAddress) { error("unable to schedule packet because of null addresses"); return; } /* check if network reliability forces us to 'drop' the packet */ gdouble reliability = topology_getReliability(worker_getTopology(), srcAddress, dstAddress); Random* random = host_getRandom(worker_getCurrentHost()); gdouble chance = random_nextDouble(random); /* don't drop control packets with length 0, otherwise congestion * control has problems responding to packet loss */ if(chance <= reliability || packet_getPayloadLength(packet) == 0) { /* the sender's packet will make it through, find latency */ gdouble latency = topology_getLatency(worker_getTopology(), srcAddress, dstAddress); SimulationTime delay = (SimulationTime) ceil(latency * SIMTIME_ONE_MILLISECOND); PacketArrivedEvent* event = packetarrived_new(packet); worker_scheduleEvent((Event*)event, delay, (GQuark)address_getID(dstAddress)); packet_addDeliveryStatus(packet, PDS_INET_SENT); } else { packet_addDeliveryStatus(packet, PDS_INET_DROPPED); } }
void networkinterface_packetArrived(NetworkInterface* interface, Packet* packet) { MAGIC_ASSERT(interface); /* a packet arrived. lets try to receive or buffer it */ guint length = packet_getPayloadLength(packet) + packet_getHeaderSize(packet); gssize space = interface->inBufferSize - interface->inBufferLength; g_assert(space >= 0); if(length <= space) { /* we have space to buffer it */ g_queue_push_tail(interface->inBuffer, packet); interface->inBufferLength += length; /* we need a trigger if we are not currently receiving */ if(!(interface->flags & NIF_RECEIVING)) { _networkinterface_scheduleNextReceive(interface); } } else { /* buffers are full, drop packet */ _networkinterface_dropInboundPacket(interface, packet); } }
gssize udp_receiveUserData(UDP* udp, gpointer buffer, gsize nBytes, in_addr_t* ip, in_port_t* port) { MAGIC_ASSERT(udp); Packet* packet = socket_removeFromInputBuffer((Socket*)udp); if(!packet) { return -1; } /* copy lesser of requested and available amount to application buffer */ guint packetLength = packet_getPayloadLength(packet); gsize copyLength = MIN(nBytes, packetLength); guint bytesCopied = packet_copyPayload(packet, 0, buffer, copyLength); utility_assert(bytesCopied == copyLength); packet_addDeliveryStatus(packet, PDS_RCV_SOCKET_DELIVERED); /* fill in address info */ if(ip) { *ip = packet_getSourceIP(packet); } if(port) { *port = packet_getSourcePort(packet); } /* destroy packet, throwing away any bytes not claimed by the app */ packet_unref(packet); /* update the tracker output buffer stats */ Tracker* tracker = host_getTracker(worker_getCurrentHost()); Socket* socket = (Socket* )udp; Descriptor* descriptor = (Descriptor *)socket; gsize outLength = socket_getOutputBufferLength(socket); gsize outSize = socket_getOutputBufferSize(socket); tracker_updateSocketOutputBuffer(tracker, descriptor->handle, outLength, outSize); debug("user read %u inbound UDP bytes", bytesCopied); return (gssize)bytesCopied; }
void networkinterface_packetArrived(NetworkInterface* interface, Packet* packet) { MAGIC_ASSERT(interface); /* a packet arrived. lets try to receive or buffer it */ guint length = packet_getPayloadLength(packet) + packet_getHeaderSize(packet); gssize space = interface->inBufferSize - interface->inBufferLength; utility_assert(space >= 0); if(length <= space) { /* we have space to buffer it */ packet_ref(packet); g_queue_push_tail(interface->inBuffer, packet); interface->inBufferLength += length; packet_addDeliveryStatus(packet, PDS_RCV_INTERFACE_BUFFERED); /* we need a trigger if we are not currently receiving */ if(!(interface->flags & NIF_RECEIVING)) { _networkinterface_scheduleNextReceive(interface); } } else { /* buffers are full, drop packet */ packet_addDeliveryStatus(packet, PDS_RCV_INTERFACE_DROPPED); } }
void pcapwriter_writePacket(PCapWriter* pcap, Packet* packet) { if(!pcap || !pcap->pcapFile || !packet) { return; } guint32 ts_sec; /* timestamp seconds */ guint32 ts_usec; /* timestamp microseconds */ guint32 incl_len; /* number of octets of packet saved in file */ guint32 orig_len; /* actual length of packet */ /* get the current time that the packet is being sent/received */ SimulationTime now = worker_getCurrentTime(); ts_sec = now / SIMTIME_ONE_SECOND; ts_usec = (now % SIMTIME_ONE_SECOND) / SIMTIME_ONE_MICROSECOND; /* get the header and payload lengths */ guint headerSize = packet_getHeaderSize(packet); guint payloadLength = packet_getPayloadLength(packet); incl_len = headerSize + payloadLength; orig_len = headerSize + payloadLength; /* get the TCP header and the payload */ PacketTCPHeader tcpHeader; guchar *payload = g_new0(guchar, payloadLength); packet_getTCPHeader(packet, &tcpHeader); packet_copyPayload(packet, 0, payload, payloadLength); /* write the PCAP packet header to the pcap file */ fwrite(&ts_sec, sizeof(ts_sec), 1, pcap->pcapFile); fwrite(&ts_usec, sizeof(ts_usec), 1, pcap->pcapFile); fwrite(&incl_len, sizeof(incl_len), 1, pcap->pcapFile); fwrite(&orig_len, sizeof(orig_len), 1, pcap->pcapFile); /* write the ethernet header */ guint8 destinationMAC[6] = {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB}; guint8 sourceMAC[6] = {0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6}; guint16 type = htons(0x0800); fwrite(destinationMAC, 1, sizeof(destinationMAC), pcap->pcapFile); fwrite(sourceMAC, 1, sizeof(sourceMAC), pcap->pcapFile); fwrite(&type, 1, sizeof(type), pcap->pcapFile); /* write the IP header */ guint8 versionAndHeaderLength = 0x45; guint8 fields = 0x00; guint16 totalLength = htons(orig_len - 14); guint16 identification = 0x0000; guint16 flagsAndFragment = 0x0040; guint8 timeToLive = 64; guint8 protocol = 6; /* TCP */ guint16 headerChecksum = 0x0000; guint32 sourceIP = tcpHeader.sourceIP; guint32 destinationIP = tcpHeader.destinationIP; fwrite(&versionAndHeaderLength, 1, sizeof(versionAndHeaderLength), pcap->pcapFile); fwrite(&fields, 1, sizeof(fields), pcap->pcapFile); fwrite(&totalLength, 1, sizeof(totalLength), pcap->pcapFile); fwrite(&identification, 1, sizeof(identification), pcap->pcapFile); fwrite(&flagsAndFragment, 1, sizeof(flagsAndFragment), pcap->pcapFile); fwrite(&timeToLive, 1, sizeof(timeToLive), pcap->pcapFile); fwrite(&protocol, 1, sizeof(protocol), pcap->pcapFile); fwrite(&headerChecksum, 1, sizeof(headerChecksum), pcap->pcapFile); fwrite(&sourceIP, 1, sizeof(sourceIP), pcap->pcapFile); fwrite(&destinationIP, 1, sizeof(destinationIP), pcap->pcapFile); /* write the TCP header */ guint16 sourcePort = tcpHeader.sourcePort; guint16 destinationPort = tcpHeader.destinationPort; guint32 sequence = tcpHeader.sequence; guint32 acknowledgement = 0; if(tcpHeader.flags & PTCP_ACK) { acknowledgement = htonl(tcpHeader.acknowledgment); } guint8 headerLength = 0x80; guint8 tcpFlags = 0; if(tcpHeader.flags & PTCP_RST) tcpFlags |= 0x04; if(tcpHeader.flags & PTCP_SYN) tcpFlags |= 0x02; if(tcpHeader.flags & PTCP_ACK) tcpFlags |= 0x10; if(tcpHeader.flags & PTCP_FIN) tcpFlags |= 0x01; guint16 window = tcpHeader.window; guint16 tcpChecksum = 0x0000; guint8 options[14] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; fwrite(&sourcePort, 1, sizeof(sourcePort), pcap->pcapFile); fwrite(&destinationPort, 1, sizeof(destinationPort), pcap->pcapFile); fwrite(&sequence, 1, sizeof(sequence), pcap->pcapFile); fwrite(&acknowledgement, 1, sizeof(acknowledgement), pcap->pcapFile); fwrite(&headerLength, 1, sizeof(headerLength), pcap->pcapFile); fwrite(&tcpFlags, 1, sizeof(tcpFlags), pcap->pcapFile); fwrite(&window, 1, sizeof(window), pcap->pcapFile); fwrite(&tcpChecksum, 1, sizeof(tcpChecksum), pcap->pcapFile); fwrite(options, 1, sizeof(options), pcap->pcapFile); /* write payload data */ if(payloadLength > 0) { fwrite(payload, 1, payloadLength, pcap->pcapFile); } g_free(payload); }
static void _networkinterface_scheduleNextSend(NetworkInterface* interface) { /* the next packet needs to be sent according to bandwidth limitations. * we need to spend time sending it before sending the next. */ SimulationTime batchTime = worker_getConfig()->interfaceBatchTime; /* loop until we find a socket that has something to send */ while(interface->sendNanosecondsConsumed <= batchTime) { gint socketHandle = -1; /* choose which packet to send next based on our queuing discipline */ Packet* packet; switch(interface->qdisc) { case NIQ_RR: { packet = _networkinterface_selectRoundRobin(interface, &socketHandle); break; } case NIQ_FIFO: default: { packet = _networkinterface_selectFirstInFirstOut(interface, &socketHandle); break; } } if(!packet) { break; } packet_addDeliveryStatus(packet, PDS_SND_INTERFACE_SENT); /* now actually send the packet somewhere */ if(networkinterface_getIPAddress(interface) == packet_getDestinationIP(packet)) { /* packet will arrive on our own interface */ PacketArrivedEvent* event = packetarrived_new(packet); /* event destination is our node */ worker_scheduleEvent((Event*)event, 1, 0); } else { /* let the worker schedule with appropriate delays */ worker_schedulePacket(packet); } /* successfully sent, calculate how long it took to 'send' this packet */ guint length = packet_getPayloadLength(packet) + packet_getHeaderSize(packet); interface->sendNanosecondsConsumed += (length * interface->timePerByteUp); tracker_addOutputBytes(host_getTracker(worker_getCurrentHost()),(guint64)length, socketHandle); _networkinterface_pcapWritePacket(interface, packet); /* sending side is done with its ref */ packet_unref(packet); } /* * we need to call back and try to send more, even if we didnt consume all * of our batch time, because we might have more packets to send then. */ SimulationTime sendTime = (SimulationTime) floor(interface->sendNanosecondsConsumed); if(sendTime >= SIMTIME_ONE_NANOSECOND) { /* we are 'sending' the packets */ interface->flags |= NIF_SENDING; /* call back when the packets are 'sent' */ InterfaceSentEvent* event = interfacesent_new(interface); /* event destination is our node */ worker_scheduleEvent((Event*)event, sendTime, 0); } }
/* return TRUE if the packet should be retransmitted */ gboolean tcp_processPacket(TCP* tcp, Packet* packet) { MAGIC_ASSERT(tcp); /* fetch the TCP info from the packet */ PacketTCPHeader header; packet_getTCPHeader(packet, &header); guint packetLength = packet_getPayloadLength(packet); /* if we run a server, the packet could be for an existing child */ tcp = _tcp_getSourceTCP(tcp, header.sourceIP, header.sourcePort); /* now we have the true TCP for the packet */ MAGIC_ASSERT(tcp); /* print packet info for debugging */ debug("%s <-> %s: processing packet# %u length %u", tcp->super.boundString, tcp->super.peerString, header.sequence, packetLength); /* if packet is reset, don't process */ if(header.flags & PTCP_RST) { /* @todo: not sure if this is handled correctly */ debug("received RESET packet"); if(!(tcp->state & TCPS_LISTEN) && !(tcp->error & TCPE_CONNECTION_RESET)) { tcp->error |= TCPE_CONNECTION_RESET; tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_TIMEWAIT); /* it will send no more user data after what we have now */ tcp->receive.end = tcp->receive.next; } packet_unref(packet); return FALSE; } /* if we are a server, we have to remember who we got this from so we can * respond back to them. this is because we could be bound to several * interfaces and otherwise cant decide which to send on. */ if(tcp->server) { tcp->server->lastPeerIP = header.sourceIP; tcp->server->lastPeerPort = header.sourcePort; tcp->server->lastIP = header.destinationIP; } /* go through the state machine, tracking processing and response */ gboolean wasProcessed = FALSE; enum ProtocolTCPFlags responseFlags = PTCP_NONE; switch(tcp->state) { case TCPS_LISTEN: { /* receive SYN, send SYNACK, move to SYNRECEIVED */ if(header.flags & PTCP_SYN) { MAGIC_ASSERT(tcp->server); wasProcessed = TRUE; /* we need to multiplex a new child */ Node* node = worker_getPrivate()->cached_node; gint multiplexedHandle = node_createDescriptor(node, DT_TCPSOCKET); TCP* multiplexed = (TCP*) node_lookupDescriptor(node, multiplexedHandle); multiplexed->child = _tcpchild_new(multiplexed, tcp, header.sourceIP, header.sourcePort); g_assert(g_hash_table_lookup(tcp->server->children, &(multiplexed->child->key)) == NULL); g_hash_table_replace(tcp->server->children, &(multiplexed->child->key), multiplexed->child); multiplexed->receive.start = header.sequence; multiplexed->receive.next = multiplexed->receive.start + 1; debug("%s <-> %s: server multiplexed child socket %s <-> %s", tcp->super.boundString, tcp->super.peerString, multiplexed->super.boundString, multiplexed->super.peerString); _tcp_setState(multiplexed, TCPS_SYNRECEIVED); /* parent will send response */ responseFlags = PTCP_SYN|PTCP_ACK; } break; } case TCPS_SYNSENT: { /* receive SYNACK, send ACK, move to ESTABLISHED */ if((header.flags & PTCP_SYN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; tcp->receive.start = header.sequence; tcp->receive.next = tcp->receive.start + 1; responseFlags |= PTCP_ACK; _tcp_setState(tcp, TCPS_ESTABLISHED); } /* receive SYN, send ACK, move to SYNRECEIVED (simultaneous open) */ else if(header.flags & PTCP_SYN) { wasProcessed = TRUE; tcp->receive.start = header.sequence; tcp->receive.next = tcp->receive.start + 1; responseFlags |= PTCP_ACK; _tcp_setState(tcp, TCPS_SYNRECEIVED); } break; } case TCPS_SYNRECEIVED: { /* receive ACK, move to ESTABLISHED */ if(header.flags & PTCP_ACK) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_ESTABLISHED); /* if this is a child, mark it accordingly */ if(tcp->child) { tcp->child->state = TCPCS_PENDING; g_queue_push_tail(tcp->child->parent->server->pending, tcp->child); /* user should accept new child from parent */ descriptor_adjustStatus(&(tcp->child->parent->super.super.super), DS_READABLE, TRUE); } } break; } case TCPS_ESTABLISHED: { /* receive FIN, send FINACK, move to CLOSEWAIT */ if(header.flags & PTCP_FIN) { wasProcessed = TRUE; /* other side of connections closed */ tcp->flags |= TCPF_REMOTE_CLOSED; responseFlags |= (PTCP_FIN|PTCP_ACK); _tcp_setState(tcp, TCPS_CLOSEWAIT); /* remote will send us no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_FINWAIT1: { /* receive FINACK, move to FINWAIT2 */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_FINWAIT2); } /* receive FIN, send FINACK, move to CLOSING (simultaneous close) */ else if(header.flags & PTCP_FIN) { wasProcessed = TRUE; responseFlags |= (PTCP_FIN|PTCP_ACK); tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_CLOSING); /* it will send no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_FINWAIT2: { /* receive FIN, send FINACK, move to TIMEWAIT */ if(header.flags & PTCP_FIN) { wasProcessed = TRUE; responseFlags |= (PTCP_FIN|PTCP_ACK); tcp->flags |= TCPF_REMOTE_CLOSED; _tcp_setState(tcp, TCPS_TIMEWAIT); /* it will send no more user data after this sequence */ tcp->receive.end = header.sequence; } break; } case TCPS_CLOSING: { /* receive FINACK, move to TIMEWAIT */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_TIMEWAIT); } break; } case TCPS_TIMEWAIT: { break; } case TCPS_CLOSEWAIT: { break; } case TCPS_LASTACK: { /* receive FINACK, move to CLOSED */ if((header.flags & PTCP_FIN) && (header.flags & PTCP_ACK)) { wasProcessed = TRUE; _tcp_setState(tcp, TCPS_CLOSED); /* we closed, cant use tcp anymore, no retransmit */ packet_unref(packet); return FALSE; } break; } case TCPS_CLOSED: { /* stray packet, drop without retransmit */ packet_unref(packet); return FALSE; break; } default: { break; } } gint nPacketsAcked = 0; /* check if we can update some TCP control info */ if(header.flags & PTCP_ACK) { wasProcessed = TRUE; if((header.acknowledgement > tcp->send.unacked) && (header.acknowledgement <= tcp->send.next)) { /* some data we sent got acknowledged */ nPacketsAcked = header.acknowledgement - tcp->send.unacked; /* the packets just acked are 'released' from retransmit queue */ for(guint i = tcp->send.unacked; i < header.acknowledgement; i++) { _tcp_removeRetransmit(tcp, i); } tcp->send.unacked = header.acknowledgement; /* update congestion window and keep track of when it was updated */ tcp->congestion.lastWindow = header.window; tcp->congestion.lastSequence = header.sequence; tcp->congestion.lastAcknowledgement = header.acknowledgement; } } gboolean doRetransmitData = FALSE; /* check if the packet carries user data for us */ if(packetLength > 0) { /* it has data, check if its in the correct range */ if(header.sequence >= (tcp->receive.next + tcp->receive.window)) { /* its too far ahead to accept now, but they should re-send it */ wasProcessed = TRUE; doRetransmitData = TRUE; } else if(header.sequence >= tcp->receive.next) { /* its in our window, so we can accept the data */ wasProcessed = TRUE; /* * if this is THE next packet, we MUST accept it to avoid * deadlocks (unless we are blocked b/c user should read) */ gboolean isNextPacket = (header.sequence == tcp->receive.next) ? TRUE : FALSE; gboolean waitingUserRead = (socket_getInputBufferSpace(&(tcp->super)) > 0) ? TRUE : FALSE; gboolean packetFits = (packetLength <= _tcp_getBufferSpaceIn(tcp)) ? TRUE : FALSE; if((isNextPacket && !waitingUserRead) || (packetFits)) { /* make sure its in order */ _tcp_bufferPacketIn(tcp, packet); } else { debug("no space for packet even though its in our window"); doRetransmitData = TRUE; } } } /* if it is a spurious packet, send a reset */ if(!wasProcessed) { g_assert(responseFlags == PTCP_NONE); responseFlags = PTCP_RST; } /* try to update congestion window based on potentially new info */ _tcp_updateCongestionWindow(tcp, nPacketsAcked); /* now flush as many packets as we can to socket */ _tcp_flush(tcp); /* send ack if they need updates but we didn't send any yet (selective acks) */ if((tcp->receive.next > tcp->send.lastAcknowledgement) || (tcp->receive.window != tcp->send.lastWindow)) { responseFlags |= PTCP_ACK; } /* send control packet if we have one */ if(responseFlags != PTCP_NONE) { debug("%s <-> %s: sending response control packet", tcp->super.boundString, tcp->super.peerString); Packet* response = _tcp_createPacket(tcp, responseFlags, NULL, 0); _tcp_bufferPacketOut(tcp, response); _tcp_flush(tcp); } /* we should free packets that are done but were not buffered */ if(!doRetransmitData && packetLength <= 0) { packet_unref(packet); } return doRetransmitData; }
gssize tcp_receiveUserData(TCP* tcp, gpointer buffer, gsize nBytes, in_addr_t* ip, in_port_t* port) { MAGIC_ASSERT(tcp); /* * TODO * We call descriptor_adjustStatus too many times here, to handle the readable * state of the socket at times when we have a partially read packet. Consider * adding a required hook for socket subclasses so the socket layer can * query TCP for readability status. */ /* make sure we pull in all readable user data */ _tcp_flush(tcp); gsize remaining = nBytes; gsize bytesCopied = 0; gsize totalCopied = 0; gsize offset = 0; gsize copyLength = 0; /* check if we have a partial packet waiting to get finished */ if(remaining > 0 && tcp->partialUserDataPacket) { guint partialLength = packet_getPayloadLength(tcp->partialUserDataPacket); guint partialBytes = partialLength - tcp->partialOffset; g_assert(partialBytes > 0); copyLength = MIN(partialBytes, remaining); bytesCopied = packet_copyPayload(tcp->partialUserDataPacket, tcp->partialOffset, buffer, copyLength); totalCopied += bytesCopied; remaining -= bytesCopied; offset += bytesCopied; if(bytesCopied >= partialBytes) { /* we finished off the partial packet */ packet_unref(tcp->partialUserDataPacket); tcp->partialUserDataPacket = NULL; tcp->partialOffset = 0; } else { /* still more partial bytes left */ tcp->partialOffset += bytesCopied; g_assert(remaining == 0); } } while(remaining > 0) { /* if we get here, we should have read the partial packet above, or * broken out below */ g_assert(tcp->partialUserDataPacket == NULL); g_assert(tcp->partialOffset == 0); /* get the next buffered packet - we'll always need it. * this could mark the socket as unreadable if this is its last packet.*/ Packet* packet = socket_removeFromInputBuffer((Socket*)tcp); if(!packet) { /* no more packets or partial packets */ break; } guint packetLength = packet_getPayloadLength(packet); copyLength = MIN(packetLength, remaining); bytesCopied = packet_copyPayload(packet, 0, buffer + offset, copyLength); totalCopied += bytesCopied; remaining -= bytesCopied; offset += bytesCopied; if(bytesCopied < packetLength) { /* we were only able to read part of this packet */ tcp->partialUserDataPacket = packet; tcp->partialOffset = bytesCopied; break; } /* we read the entire packet, and are now finished with it */ packet_unref(packet); } /* now we update readability of the socket */ if((tcp->super.inputBufferLength > 0) || (tcp->partialUserDataPacket != NULL)) { /* we still have readable data */ descriptor_adjustStatus(&(tcp->super.super.super), DS_READABLE, TRUE); } else { /* all of our ordered user data has been read */ if((tcp->unorderedInputLength == 0) && (tcp->error & TCPE_RECEIVE_EOF)) { /* there is no more unordered data either, and we need to signal EOF */ if(totalCopied > 0) { /* we just received bytes, so we can't EOF until the next call. * make sure we stay readable so we DO actually EOF the socket */ descriptor_adjustStatus(&(tcp->super.super.super), DS_READABLE, TRUE); } else { /* OK, no more data and nothing just received. */ if(tcp->flags & TCPF_EOF_SIGNALED) { /* we already signaled close, now its an error */ return -2; } else { /* we have not signaled close, do that now and close out the socket */ _tcp_endOfFileSignalled(tcp); return 0; } } } else { /* our socket still has unordered data or is still open, but empty for now */ descriptor_adjustStatus(&(tcp->super.super.super), DS_READABLE, FALSE); } } debug("%s <-> %s: receiving %lu user bytes", tcp->super.boundString, tcp->super.peerString, totalCopied); return (gssize) (totalCopied == 0 ? -1 : totalCopied); }
static void _tcp_flush(TCP* tcp) { MAGIC_ASSERT(tcp); /* make sure our information is up to date */ _tcp_updateReceiveWindow(tcp); _tcp_updateSendWindow(tcp); /* flush packets that can now be sent to socket */ while(g_queue_get_length(tcp->throttledOutput) > 0) { /* get the next throttled packet, in sequence order */ Packet* packet = g_queue_pop_head(tcp->throttledOutput); /* break out if we have no packets left */ if(!packet) { break; } guint length = packet_getPayloadLength(packet); if(length > 0) { PacketTCPHeader header; packet_getTCPHeader(packet, &header); /* we cant send it if our window is too small */ gboolean fitsInWindow = (header.sequence < (tcp->send.unacked + tcp->send.window)) ? TRUE : FALSE; /* we cant send it if we dont have enough space */ gboolean fitsInBuffer = (length <= socket_getOutputBufferSpace(&(tcp->super))) ? TRUE : FALSE; if(!fitsInBuffer || !fitsInWindow) { /* we cant send the packet yet */ g_queue_push_head(tcp->throttledOutput, packet); break; } else { /* we will send: store length in virtual retransmission buffer * so we can reduce buffer space consumed when we receive the ack */ _tcp_addRetransmit(tcp, header.sequence, length); } } /* packet is sendable, we removed it from out buffer */ tcp->throttledOutputLength -= length; /* update TCP header to our current advertised window and acknowledgement */ packet_updateTCP(packet, tcp->receive.next, tcp->receive.window); /* keep track of the last things we sent them */ tcp->send.lastAcknowledgement = tcp->receive.next; tcp->send.lastWindow = tcp->receive.window; /* socket will queue it ASAP */ gboolean success = socket_addToOutputBuffer(&(tcp->super), packet); /* we already checked for space, so this should always succeed */ g_assert(success); } /* any packets now in order can be pushed to our user input buffer */ while(g_queue_get_length(tcp->unorderedInput) > 0) { Packet* packet = g_queue_pop_head(tcp->unorderedInput); PacketTCPHeader header; packet_getTCPHeader(packet, &header); if(header.sequence == tcp->receive.next) { /* move from the unordered buffer to user input buffer */ gboolean fitInBuffer = socket_addToInputBuffer(&(tcp->super), packet); if(fitInBuffer) { tcp->unorderedInputLength -= packet_getPayloadLength(packet); (tcp->receive.next)++; continue; } } /* we could not buffer it because its out of order or we have no space */ g_queue_push_head(tcp->unorderedInput, packet); break; } /* check if user needs an EOF signal */ gboolean wantsEOF = ((tcp->flags & TCPF_LOCAL_CLOSED) || (tcp->flags & TCPF_REMOTE_CLOSED)) ? TRUE : FALSE; if(wantsEOF) { /* if anyone closed, can't send anymore */ tcp->error |= TCPE_SEND_EOF; if((tcp->receive.next >= tcp->receive.end) && !(tcp->flags & TCPF_EOF_SIGNALED)) { /* user needs to read a 0 so it knows we closed */ tcp->error |= TCPE_RECEIVE_EOF; descriptor_adjustStatus((Descriptor*)tcp, DS_READABLE, TRUE); } } }