/*----------------------------------------------------------------------------*/ static inline int ProcessRST(mtcp_manager_t mtcp, tcp_stream *cur_stream, uint32_t ack_seq) { /* TODO: we need reset validation logic */ /* the sequence number of a RST should be inside window */ /* (in SYN_SENT state, it should ack the previous SYN */ TRACE_DBG("Stream %d: TCP RESET (%s)\n", cur_stream->id, TCPStateToString(cur_stream)); #if DUMP_STREAM DumpStream(mtcp, cur_stream); #endif if (cur_stream->state <= TCP_ST_SYN_SENT) { /* not handled here */ return FALSE; } if (cur_stream->state == TCP_ST_SYN_RCVD) { if (ack_seq == cur_stream->rcv_nxt) { cur_stream->state = TCP_ST_CLOSED; cur_stream->close_reason = TCP_RESET; DestroyTCPStream(mtcp, cur_stream); } return TRUE; } /* if the application is already closed the connection, just destroy the it */ if (cur_stream->state == TCP_ST_FIN_WAIT_1 || cur_stream->state == TCP_ST_FIN_WAIT_2 || cur_stream->state == TCP_ST_LAST_ACK || cur_stream->state == TCP_ST_CLOSING || cur_stream->state == TCP_ST_TIME_WAIT) { cur_stream->state = TCP_ST_CLOSED; cur_stream->close_reason = TCP_ACTIVE_CLOSE; DestroyTCPStream(mtcp, cur_stream); return TRUE; } if (cur_stream->state >= TCP_ST_ESTABLISHED && cur_stream->state <= TCP_ST_CLOSE_WAIT) { /* ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT */ /* TODO: flush all the segment queues */ //NotifyConnectionReset(mtcp, cur_stream); } if (!(cur_stream->sndvar->on_closeq || cur_stream->sndvar->on_closeq_int || cur_stream->sndvar->on_resetq || cur_stream->sndvar->on_resetq_int)) { //cur_stream->state = TCP_ST_CLOSED; //DestroyTCPStream(mtcp, cur_stream); cur_stream->state = TCP_ST_CLOSE_WAIT; cur_stream->close_reason = TCP_RESET; RaiseCloseEvent(mtcp, cur_stream); } return TRUE; }
/*----------------------------------------------------------------------------*/ static inline int mtcp_is_connected(mtcp_manager_t mtcp, tcp_stream *cur_stream) { if (!cur_stream) { TRACE_API("Stream does not exist\n"); return FALSE; } if (cur_stream->state != TCP_ST_ESTABLISHED) { TRACE_API("Stream %d not ESTABLISHED. state: %s\n", cur_stream->id, TCPStateToString(cur_stream)); return FALSE; } return TRUE; }
/*----------------------------------------------------------------------------*/ static int RaisePendingStreamEvents(mtcp_manager_t mtcp, struct mtcp_epoll *ep, socket_map_t socket) { tcp_stream *stream = socket->stream; if (!stream) return -1; if (stream->state < TCP_ST_ESTABLISHED) return -1; TRACE_EPOLL("Stream %d at state %s\n", stream->id, TCPStateToString(stream)); /* if there are payloads already read before epoll registration */ /* generate read event */ if (socket->epoll & MTCP_EPOLLIN) { struct tcp_recv_vars *rcvvar = stream->rcvvar; if (rcvvar->rcvbuf && rcvvar->rcvbuf->merged_len > 0) { TRACE_EPOLL("Socket %d: Has existing payloads\n", socket->id); AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN); } else if (stream->state == TCP_ST_CLOSE_WAIT) { TRACE_EPOLL("Socket %d: Waiting for close\n", socket->id); AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLIN); } } /* same thing to the write event */ if (socket->epoll & MTCP_EPOLLOUT) { struct tcp_send_vars *sndvar = stream->sndvar; if (!sndvar->sndbuf || (sndvar->sndbuf && sndvar->sndbuf->len < sndvar->snd_wnd)) { if (!(socket->events & MTCP_EPOLLOUT)) { TRACE_EPOLL("Socket %d: Adding write event\n", socket->id); AddEpollEvent(ep, USR_SHADOW_EVENT_QUEUE, socket, MTCP_EPOLLOUT); } } } return 0; }
/*----------------------------------------------------------------------------*/ static inline int CloseStreamSocket(mctx_t mctx, int sockid) { mtcp_manager_t mtcp; tcp_stream *cur_stream; int ret; mtcp = GetMTCPManager(mctx); if (!mtcp) { return -1; } cur_stream = mtcp->smap[sockid].stream; if (!cur_stream) { TRACE_API("Socket %d: stream does not exist.\n", sockid); errno = ENOTCONN; return -1; } if (cur_stream->closed) { TRACE_API("Socket %d (Stream %u): already closed stream\n", sockid, cur_stream->id); return 0; } cur_stream->closed = TRUE; TRACE_API("Stream %d: closing the stream.\n", cur_stream->id); cur_stream->socket = NULL; if (cur_stream->state == TCP_ST_CLOSED) { TRACE_API("Stream %d at TCP_ST_CLOSED. destroying the stream.\n", cur_stream->id); SQ_LOCK(&mtcp->ctx->destroyq_lock); StreamEnqueue(mtcp->destroyq, cur_stream); mtcp->wakeup_flag = TRUE; SQ_UNLOCK(&mtcp->ctx->destroyq_lock); return 0; } else if (cur_stream->state == TCP_ST_SYN_SENT) { #if 1 SQ_LOCK(&mtcp->ctx->destroyq_lock); StreamEnqueue(mtcp->destroyq, cur_stream); SQ_UNLOCK(&mtcp->ctx->destroyq_lock); mtcp->wakeup_flag = TRUE; #endif return -1; } else if (cur_stream->state != TCP_ST_ESTABLISHED && cur_stream->state != TCP_ST_CLOSE_WAIT) { TRACE_API("Stream %d at state %s\n", cur_stream->id, TCPStateToString(cur_stream)); errno = EBADF; return -1; } SQ_LOCK(&mtcp->ctx->close_lock); cur_stream->sndvar->on_closeq = TRUE; ret = StreamEnqueue(mtcp->closeq, cur_stream); mtcp->wakeup_flag = TRUE; SQ_UNLOCK(&mtcp->ctx->close_lock); if (ret < 0) { TRACE_ERROR("(NEVER HAPPEN) Failed to enqueue the stream to close.\n"); errno = EAGAIN; return -1; } return 0; }
/*----------------------------------------------------------------------------*/ int mtcp_connect(mctx_t mctx, int sockid, const struct sockaddr *addr, socklen_t addrlen) { mtcp_manager_t mtcp; socket_map_t socket; tcp_stream *cur_stream; struct sockaddr_in *addr_in; in_addr_t dip; in_port_t dport; int is_dyn_bound = FALSE; int ret; mtcp = GetMTCPManager(mctx); if (!mtcp) { return -1; } if (sockid < 0 || sockid >= CONFIG.max_concurrency) { TRACE_API("Socket id %d out of range.\n", sockid); errno = EBADF; return -1; } if (mtcp->smap[sockid].socktype == MTCP_SOCK_UNUSED) { TRACE_API("Invalid socket id: %d\n", sockid); errno = EBADF; return -1; } if (mtcp->smap[sockid].socktype != MTCP_SOCK_STREAM) { TRACE_API("Not an end socket. id: %d\n", sockid); errno = ENOTSOCK; return -1; } if (!addr) { TRACE_API("Socket %d: empty address!\n", sockid); errno = EFAULT; return -1; } /* we only allow bind() for AF_INET address */ if (addr->sa_family != AF_INET || addrlen < sizeof(struct sockaddr_in)) { TRACE_API("Socket %d: invalid argument!\n", sockid); errno = EAFNOSUPPORT; return -1; } socket = &mtcp->smap[sockid]; if (socket->stream) { TRACE_API("Socket %d: stream already exist!\n", sockid); if (socket->stream->state >= TCP_ST_ESTABLISHED) { errno = EISCONN; } else { errno = EALREADY; } return -1; } addr_in = (struct sockaddr_in *)addr; dip = addr_in->sin_addr.s_addr; dport = addr_in->sin_port; /* address binding */ if ((socket->opts & MTCP_ADDR_BIND) && socket->saddr.sin_port != INPORT_ANY && socket->saddr.sin_addr.s_addr != INADDR_ANY) { int rss_core; uint8_t endian_check = (current_iomodule_func == &dpdk_module_func) ? 0 : 1; rss_core = GetRSSCPUCore(socket->saddr.sin_addr.s_addr, dip, socket->saddr.sin_port, dport, num_queues, endian_check); if (rss_core != mctx->cpu) { errno = EINVAL; return -1; } } else { if (mtcp->ap) { ret = FetchAddress(mtcp->ap, mctx->cpu, num_queues, addr_in, &socket->saddr); } else { ret = FetchAddress(ap, mctx->cpu, num_queues, addr_in, &socket->saddr); } if (ret < 0) { errno = EAGAIN; return -1; } socket->opts |= MTCP_ADDR_BIND; is_dyn_bound = TRUE; } cur_stream = CreateTCPStream(mtcp, socket, socket->socktype, socket->saddr.sin_addr.s_addr, socket->saddr.sin_port, dip, dport); if (!cur_stream) { TRACE_ERROR("Socket %d: failed to create tcp_stream!\n", sockid); errno = ENOMEM; return -1; } if (is_dyn_bound) cur_stream->is_bound_addr = TRUE; cur_stream->sndvar->cwnd = 1; cur_stream->sndvar->ssthresh = cur_stream->sndvar->mss * 10; cur_stream->state = TCP_ST_SYN_SENT; TRACE_STATE("Stream %d: TCP_ST_SYN_SENT\n", cur_stream->id); SQ_LOCK(&mtcp->ctx->connect_lock); ret = StreamEnqueue(mtcp->connectq, cur_stream); SQ_UNLOCK(&mtcp->ctx->connect_lock); mtcp->wakeup_flag = TRUE; if (ret < 0) { TRACE_ERROR("Socket %d: failed to enqueue to conenct queue!\n", sockid); SQ_LOCK(&mtcp->ctx->destroyq_lock); StreamEnqueue(mtcp->destroyq, cur_stream); SQ_UNLOCK(&mtcp->ctx->destroyq_lock); errno = EAGAIN; return -1; } /* if nonblocking socket, return EINPROGRESS */ if (socket->opts & MTCP_NONBLOCK) { errno = EINPROGRESS; return -1; } else { while (1) { if (!cur_stream) { TRACE_ERROR("STREAM DESTROYED\n"); errno = ETIMEDOUT; return -1; } if (cur_stream->state > TCP_ST_ESTABLISHED) { TRACE_ERROR("Socket %d: weird state %s\n", sockid, TCPStateToString(cur_stream)); // TODO: how to handle this? errno = ENOSYS; return -1; } if (cur_stream->state == TCP_ST_ESTABLISHED) { break; } usleep(1000); } } return 0; }
/*----------------------------------------------------------------------------*/ static inline int ProcessRST(mtcp_manager_t mtcp, tcp_stream *cur_stream, struct pkt_ctx *pctx) { /* TODO: we need reset validation logic */ /* the sequence number of a RST should be inside window */ /* (in SYN_SENT state, it should ack the previous SYN */ TRACE_DBG("Stream %d: TCP RESET (%s)\n", cur_stream->id, TCPStateToString(cur_stream)); #if DUMP_STREAM DumpStream(mtcp, cur_stream); #endif if (cur_stream->state <= TCP_ST_SYN_SENT) { /* not handled here */ return FALSE; } if (cur_stream->state == TCP_ST_SYN_RCVD) { /* ACK number of last sent ACK packet == rcv_nxt + 1*/ if (pctx->p.seq == 0 || #ifdef BE_RESILIENT_TO_PACKET_DROP pctx->p.seq == cur_stream->rcv_nxt + 1 || #endif pctx->p.ack_seq == cur_stream->rcv_nxt + 1) { cur_stream->state = TCP_ST_CLOSED_RSVD; cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE; cur_stream->close_reason = TCP_RESET; cur_stream->actions |= MOS_ACT_DESTROY; } else { RAISE_DEBUG_EVENT(mtcp, cur_stream, "(SYN_RCVD): Ignore invalid RST. " "ack_seq expected: %u, ack_seq rcvd: %u\n", cur_stream->rcv_nxt + 1, pctx->p.ack_seq); } return TRUE; } /* if the application is already closed the connection, just destroy the it */ if (cur_stream->state == TCP_ST_FIN_WAIT_1 || cur_stream->state == TCP_ST_FIN_WAIT_2 || cur_stream->state == TCP_ST_LAST_ACK || cur_stream->state == TCP_ST_CLOSING || cur_stream->state == TCP_ST_TIME_WAIT) { cur_stream->state = TCP_ST_CLOSED_RSVD; cur_stream->close_reason = TCP_ACTIVE_CLOSE; cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE; cur_stream->actions |= MOS_ACT_DESTROY; return TRUE; } if (cur_stream->state >= TCP_ST_ESTABLISHED && cur_stream->state <= TCP_ST_CLOSE_WAIT) { /* ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT */ /* TODO: flush all the segment queues */ //NotifyConnectionReset(mtcp, cur_stream); } if (!(cur_stream->sndvar->on_closeq || cur_stream->sndvar->on_closeq_int || cur_stream->sndvar->on_resetq || cur_stream->sndvar->on_resetq_int)) { //cur_stream->state = TCP_ST_CLOSED_RSVD; //cur_stream->actions |= MOS_ACT_DESTROY; cur_stream->state = TCP_ST_CLOSED_RSVD; cur_stream->cb_events |= MOS_ON_TCP_STATE_CHANGE; cur_stream->close_reason = TCP_RESET; if (HAS_STREAM_TYPE(cur_stream, MOS_SOCK_STREAM)) RaiseCloseEvent(mtcp, cur_stream); } return TRUE; }