static int packet_stream_is_packet_next(struct packet_stream *stream, struct packet_stream_pkt *pkt, int direction) { int rev_direction = POM_DIR_REVERSE(direction); uint32_t cur_seq = stream->cur_seq[direction]; uint32_t rev_seq = stream->cur_seq[rev_direction]; // Check that there is no gap with what we expect if ((cur_seq < pkt->seq && pkt->seq - cur_seq < PACKET_HALF_SEQ) || (cur_seq > pkt->seq && cur_seq - pkt->seq > PACKET_HALF_SEQ)) { // There is a gap debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : GAP : cur_seq %u, rev_seq %u", pthread_self(), stream, pkt->pkt->ts.tv_sec, pkt->pkt->ts.tv_usec, pkt->seq, pkt->ack, cur_seq, rev_seq); return 0; } if (stream->flags & PACKET_FLAG_STREAM_BIDIR) { // There is additional checking for bi dir stream if ((rev_seq < pkt->ack && pkt->ack - rev_seq < PACKET_HALF_SEQ) || (rev_seq > pkt->ack && rev_seq - pkt->ack > PACKET_HALF_SEQ)) { // The host processed data in the reverse direction which we haven't processed yet if (stream->t) conntrack_timer_queue(stream->t, stream->rev_dir_timeout); debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : reverse missing : cur_seq %u, rev_seq %u", pthread_self(), stream, pkt->pkt->ts.tv_sec, pkt->pkt->ts.tv_usec, pkt->seq, pkt->ack, cur_seq, rev_seq); return 0; } } // This packet can be processed debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : is next : cur_seq %u, rev_seq %u", pthread_self(), stream, pkt->pkt->ts.tv_sec, pkt->pkt->ts.tv_usec, pkt->seq, pkt->ack, cur_seq, rev_seq); return 1; }
int packet_stream_process_packet(struct packet_stream *stream, struct packet *pkt, struct proto_process_stack *stack, unsigned int stack_index, uint32_t seq, uint32_t ack) { if (!stream || !pkt || !stack) return PROTO_ERR; debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : start", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); struct proto_process_stack *cur_stack = &stack[stack_index]; int direction = cur_stack->direction; int must_wait = 0; pom_mutex_lock(&stream->wait_lock); int res = pthread_mutex_trylock(&stream->lock); if (res == EBUSY) { // Already locked, let's wait a bit must_wait = 1; } else if (res) { pomlog(POMLOG_ERR "Error while locking packet stream lock : %s", pom_strerror(errno)); abort(); return POM_ERR; } else { // We got the processing lock. But was it really this thread's turn ? struct packet_stream_thread_wait *tmp = stream->wait_list_head; // A thread with a packet preceding ours is waiting if (tmp && (tmp->ts.tv_sec < pkt->ts.tv_sec || (tmp->ts.tv_sec == pkt->ts.tv_sec && tmp->ts.tv_usec < pkt->ts.tv_usec))) { // No it wasn't, release it and signal the right thread must_wait = 2; pom_mutex_unlock(&stream->lock); debug_stream("thread %p, entry %p : signaling thread %p", pthread_self(), stream, stream->wait_list_head->thread); pthread_cond_broadcast(&stream->wait_list_head->cond); } else { // Yes it was. YAY ! pom_mutex_unlock(&stream->wait_lock); } } if (must_wait) { // Add ourself in the waiting list struct packet_stream_thread_wait *lst = NULL; if (stream->wait_list_unused) { lst = stream->wait_list_unused; stream->wait_list_unused = lst->next; lst->next = NULL; } else { lst = malloc(sizeof(struct packet_stream_thread_wait)); if (!lst) { pom_oom(sizeof(struct packet_stream_thread_wait)); pom_mutex_unlock(&stream->wait_lock); return POM_ERR; } memset(lst, 0, sizeof(struct packet_stream_thread_wait)); if (pthread_cond_init(&lst->cond, NULL)) { pomlog(POMLOG_ERR "Error while initializing wait list condition : %s", pom_strerror(errno)); free(lst); return POM_ERR; } } memcpy(&lst->ts, &pkt->ts, sizeof(struct timeval)); lst->thread = pthread_self(); struct packet_stream_thread_wait *tmp; for (tmp = stream->wait_list_head; tmp && (tmp->ts.tv_sec < lst->ts.tv_sec || (tmp->ts.tv_sec == lst->ts.tv_sec && tmp->ts.tv_usec < lst->ts.tv_usec)); tmp = tmp->next); if (tmp) { lst->prev = tmp->prev; if (lst->prev) lst->prev->next = lst; else stream->wait_list_head = lst; lst->next = tmp; lst->next->prev = lst; } else { lst->prev = stream->wait_list_tail; if (lst->prev) lst->prev->next = lst; else stream->wait_list_head = lst; stream->wait_list_tail = lst; } while (1) { debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : waiting", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); if (pthread_cond_wait(&lst->cond, &stream->wait_lock)) { pomlog(POMLOG_ERR "Error while waiting for the packet stream wait cond : %s", pom_strerror(errno)); abort(); return POM_ERR; } if (stream->wait_list_head != lst) { // There is a small chance that another stream lock stream->wait_lock while pthread_cond_wait acquires it // If we are not the right thread, then simply signal the right one and wait again for our turn debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : wrong thread woke up", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); pthread_cond_broadcast(&stream->wait_list_head->cond); continue; } break; } tmp = stream->wait_list_head; stream->wait_list_head = tmp->next; if (stream->wait_list_head) stream->wait_list_head->prev = NULL; else stream->wait_list_tail = NULL; tmp->next = stream->wait_list_unused; tmp->prev = NULL; stream->wait_list_unused = tmp; pom_mutex_unlock(&stream->wait_lock); pom_mutex_lock(&stream->lock); } debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : start locked", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); // Update the stream flags if (stream->flags & PACKET_FLAG_STREAM_BIDIR) { // Update flags if (direction == POM_DIR_FWD && !(stream->flags & PACKET_FLAG_STREAM_GOT_FWD_DIR)) { stream->flags |= PACKET_FLAG_STREAM_GOT_FWD_DIR; } else if (direction == POM_DIR_REV && !(stream->flags & PACKET_FLAG_STREAM_GOT_REV_DIR)) { stream->flags |= PACKET_FLAG_STREAM_GOT_REV_DIR; } } // Put this packet in our struct packet_stream_pkt struct packet_stream_pkt spkt = {0}; spkt.pkt = pkt; spkt.seq = seq; spkt.ack = ack; spkt.plen = cur_stack->plen; spkt.stack = stack; spkt.stack_index = stack_index; // Check if the packet is worth processing uint32_t cur_seq = stream->cur_seq[direction]; if (cur_seq != seq) { if (packet_stream_is_packet_old_dupe(stream, &spkt, direction)) { // cur_seq is after the end of the packet, discard it packet_stream_end_process_packet(stream); debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : discard", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); return PROTO_OK; } if (packet_stream_remove_dupe_bytes(stream, &spkt, direction) == POM_ERR) { packet_stream_end_process_packet(stream); return PROTO_ERR; } } // Ok let's process it then // Check if it is the packet we're waiting for if (packet_stream_is_packet_next(stream, &spkt, direction)) { // Process it stream->cur_seq[direction] += cur_stack->plen; stream->cur_ack[direction] = ack; debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : process", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); int res = stream->handler(stream->ce, pkt, stack, stack_index); if (res == PROTO_ERR) { packet_stream_end_process_packet(stream); return PROTO_ERR; } // Check if additional packets can be processed struct packet_stream_pkt *p = NULL; unsigned int cur_dir = direction, additional_processed = 0; while ((p = packet_stream_get_next(stream, &cur_dir))) { debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : process additional", pthread_self(), stream, p->pkt->ts.tv_sec, p->pkt->ts.tv_usec, p->seq, p->ack); if (stream->handler(stream->ce, p->pkt, p->stack, p->stack_index) == POM_ERR) { packet_stream_end_process_packet(stream); return PROTO_ERR; } stream->cur_seq[cur_dir] += p->plen; stream->cur_ack[cur_dir] = p->ack; packet_stream_free_packet(p); additional_processed = 1; } if (additional_processed) { if (!stream->head[POM_DIR_FWD] && !stream->head[POM_DIR_REV]) conntrack_timer_dequeue(stream->t); else conntrack_timer_queue(stream->t, stream->same_dir_timeout); } packet_stream_end_process_packet(stream); debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : done processed", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); return res; } // Queue the packet then debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : queue", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); struct packet_stream_pkt *p = malloc(sizeof(struct packet_stream_pkt)); if (!p) { pom_oom(sizeof(struct packet_stream_pkt)); packet_stream_end_process_packet(stream); return PROTO_ERR; } memset(p, 0 , sizeof(struct packet_stream_pkt)); if (cur_stack->plen) { // No need to backup this if there is no payload p->pkt = packet_clone(pkt, stream->flags); if (!p->pkt) { packet_stream_end_process_packet(stream); free(p); return PROTO_ERR; } p->stack = core_stack_backup(stack, pkt, p->pkt); if (!p->stack) { packet_stream_end_process_packet(stream); packet_pool_release(p->pkt); free(p); return PROTO_ERR; } } p->plen = cur_stack->plen; p->seq = seq; p->ack = ack; p->stack_index = stack_index; if (!stream->tail[direction]) { stream->head[direction] = p; stream->tail[direction] = p; } else { struct packet_stream_pkt *tmp = stream->tail[direction]; while ( tmp && ((tmp->seq >= seq && tmp->seq - seq < PACKET_HALF_SEQ) || (tmp->seq <= seq && seq - tmp->seq > PACKET_HALF_SEQ))) { tmp = tmp->prev; } if (!tmp) { // Packet goes at the begining of the list p->next = stream->head[direction]; if (p->next) p->next->prev = p; else stream->tail[direction] = p; stream->head[direction] = p; } else { // Insert the packet after the current one p->next = tmp->next; p->prev = tmp; if (p->next) p->next->prev = p; else stream->tail[direction] = p; tmp->next = p; } } stream->cur_buff_size += cur_stack->plen; if (stream->cur_buff_size >= stream->max_buff_size) { // Buffer overflow debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : buffer overflow, forced dequeue", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); if (packet_stream_force_dequeue(stream) != POM_OK) { packet_stream_end_process_packet(stream); return POM_ERR; } if (stream->t) conntrack_timer_dequeue(stream->t); } // Add timeout if (stream->t && (stream->head[POM_DIR_FWD] || stream->head[POM_DIR_REV])) conntrack_timer_queue(stream->t, stream->same_dir_timeout); packet_stream_end_process_packet(stream); debug_stream("thread %p, entry %p, packet %u.%06u, seq %u, ack %u : done queued", pthread_self(), stream, pkt->ts.tv_sec, pkt->ts.tv_usec, seq, ack); return PROTO_OK; }
static int proto_ipv4_process(void *proto_priv, struct packet *p, struct proto_process_stack *stack, unsigned int stack_index) { struct proto_process_stack *s = &stack[stack_index]; struct proto_process_stack *s_next = &stack[stack_index + 1]; struct ip* hdr = s->pload; unsigned int hdr_len = hdr->ip_hl * 4; if (s->plen < sizeof(struct ip) || // length smaller than header hdr->ip_hl < 5 || // ip header < 5 bytes ntohs(hdr->ip_len) < hdr_len || // datagram size < ip header length ntohs(hdr->ip_len) > s->plen) { // datagram size > given size return PROTO_INVALID; } PTYPE_IPV4_SETADDR(s->pkt_info->fields_value[proto_ipv4_field_src], hdr->ip_src); PTYPE_IPV4_SETADDR(s->pkt_info->fields_value[proto_ipv4_field_dst], hdr->ip_dst); PTYPE_UINT8_SETVAL(s->pkt_info->fields_value[proto_ipv4_field_tos], hdr->ip_tos); PTYPE_UINT8_SETVAL(s->pkt_info->fields_value[proto_ipv4_field_ttl], hdr->ip_ttl); // Handle conntrack stuff if (conntrack_get(stack, stack_index) != POM_OK) return PROTO_ERR; s_next->pload = s->pload + hdr_len; s_next->plen = ntohs(hdr->ip_len) - hdr_len; s_next->proto = proto_get_by_number(s->proto, hdr->ip_p); int res = POM_ERR; if (s->ce->children) { res = conntrack_delayed_cleanup(s->ce, 0, p->ts); } else { uint32_t *conntrack_timeout = PTYPE_UINT32_GETVAL(param_conntrack_timeout); res = conntrack_delayed_cleanup(s->ce, *conntrack_timeout, p->ts); } if (res == POM_ERR) { conntrack_unlock(s->ce); return PROTO_ERR; } uint16_t frag_off = ntohs(hdr->ip_off); // Check if packet is fragmented and need more handling if (frag_off & IP_DONT_FRAG) { conntrack_unlock(s->ce); return PROTO_OK; // Nothing to do } if (!(frag_off & IP_MORE_FRAG) && !(frag_off & IP_OFFSET_MASK)) { conntrack_unlock(s->ce); return PROTO_OK; // Nothing to do, full packet } uint16_t offset = (frag_off & IP_OFFSET_MASK) << 3; size_t frag_size = ntohs(hdr->ip_len) - (hdr->ip_hl * 4); // Ignore invalid fragments if (frag_size > 0xFFFF) { conntrack_unlock(s->ce); return PROTO_INVALID; } if (frag_size > s->plen + hdr_len) { conntrack_unlock(s->ce); return PROTO_INVALID; } // Account for one more fragment registry_perf_inc(perf_frags, 1); struct proto_ipv4_fragment *tmp = s->ce->priv; // Let's find the right buffer for (; tmp && tmp->id != hdr->ip_id; tmp = tmp->next); if (!tmp) { // Buffer not found, create it tmp = malloc(sizeof(struct proto_ipv4_fragment)); if (!tmp) { pom_oom(sizeof(struct proto_ipv4_fragment)); conntrack_unlock(s->ce); return PROTO_ERR; } memset(tmp, 0, sizeof(struct proto_ipv4_fragment)); tmp->t = conntrack_timer_alloc(s->ce, proto_ipv4_fragment_cleanup, tmp); if (!tmp->t) { conntrack_unlock(s->ce); free(tmp); return PROTO_ERR; } tmp->id = hdr->ip_id; if (!s_next->proto) { // Set processed flag so no attempt to process this will be done tmp->flags |= PROTO_IPV4_FLAG_PROCESSED; conntrack_unlock(s->ce); conntrack_timer_cleanup(tmp->t); free(tmp); return PROTO_STOP; } tmp->multipart = packet_multipart_alloc(s_next->proto, 0); if (!tmp->multipart) { conntrack_unlock(s->ce); conntrack_timer_cleanup(tmp->t); free(tmp); return PROTO_ERR; } tmp->next = s->ce->priv; if (tmp->next) tmp->next->prev = tmp; s->ce->priv = tmp; } // Fragment was already handled if (tmp->flags & PROTO_IPV4_FLAG_PROCESSED) { conntrack_unlock(s->ce); registry_perf_inc(perf_frags_dropped, 1); return PROTO_STOP; } // Add the fragment if (packet_multipart_add_packet(tmp->multipart, p, offset, frag_size, (s->pload - (void*)p->buff) + (hdr->ip_hl * 4)) != POM_OK) { conntrack_unlock(s->ce); packet_multipart_cleanup(tmp->multipart); conntrack_timer_cleanup(tmp->t); free(tmp); return PROTO_ERR; } tmp->count++; // Schedule the timeout for the fragment uint32_t *frag_timeout = PTYPE_UINT32_GETVAL(param_frag_timeout); conntrack_timer_queue(tmp->t, *frag_timeout, p->ts); if (!(frag_off & IP_MORE_FRAG)) tmp->flags |= PROTO_IPV4_FLAG_GOT_LAST; if ((tmp->flags & PROTO_IPV4_FLAG_GOT_LAST) && !tmp->multipart->gaps) tmp->flags |= PROTO_IPV4_FLAG_PROCESSED; conntrack_unlock(s->ce); if ((tmp->flags & PROTO_IPV4_FLAG_PROCESSED)) { int res = packet_multipart_process(tmp->multipart, stack, stack_index + 1); tmp->multipart = NULL; // Multipart will be cleared automatically if (res == PROTO_ERR) { return PROTO_ERR; } else if (res == PROTO_INVALID) { registry_perf_inc(perf_frags_dropped, tmp->count); } else { registry_perf_inc(perf_reassembled_pkts, 1); } } return PROTO_STOP; // Stop processing the packet }