static void vr_flow_flush(void *arg) { struct vrouter *router; struct vr_flow_entry *fe; struct vr_forwarding_md fmd; struct vr_flow_md *flmd = (struct vr_flow_md *)arg; router = flmd->flmd_router; if (!router) return; fe = vr_get_flow_entry(router, flmd->flmd_index); if (!fe) return; vr_init_forwarding_md(&fmd); vr_flow_set_forwarding_md(router, fe, flmd->flmd_index, &fmd); vr_flush_entry(router, fe, flmd, &fmd); if (!(flmd->flmd_flags & VR_FLOW_FLAG_ACTIVE)) { vr_reset_flow_entry(router, fe, flmd->flmd_index); } return; }
static void vr_flow_set_forwarding_md(struct vrouter *router, struct vr_flow_entry *fe, unsigned int index, struct vr_forwarding_md *md) { struct vr_flow_entry *rfe; md->fmd_flow_index = index; md->fmd_ecmp_nh_index = fe->fe_ecmp_nh_index; if (fe->fe_flags & VR_RFLOW_VALID) { rfe = vr_get_flow_entry(router, fe->fe_rflow); if (rfe) md->fmd_ecmp_src_nh_index = rfe->fe_ecmp_nh_index; } return; }
/* * can be called with 'fe' as null (specifically when flow is added from * agent), in which case we should be checking only the request */ static int vr_flow_req_is_invalid(struct vrouter *router, vr_flow_req *req, struct vr_flow_entry *fe) { struct vr_flow_entry *rfe; if (fe) { if ((unsigned int)req->fr_flow_sip != fe->fe_key.key_src_ip || (unsigned int)req->fr_flow_dip != fe->fe_key.key_dest_ip || (unsigned short)req->fr_flow_sport != fe->fe_key.key_src_port || (unsigned short)req->fr_flow_dport != fe->fe_key.key_dst_port|| (unsigned short)req->fr_flow_nh_id != fe->fe_key.key_nh_id || (unsigned char)req->fr_flow_proto != fe->fe_key.key_proto) { return -EBADF; } } if (req->fr_flags & VR_FLOW_FLAG_VRFT) { if ((unsigned short)req->fr_flow_dvrf >= VR_MAX_VRFS) return -EINVAL; } if (req->fr_flags & VR_FLOW_FLAG_MIRROR) { if (((unsigned int)req->fr_mir_id >= router->vr_max_mirror_indices) && (unsigned int)req->fr_sec_mir_id >= router->vr_max_mirror_indices) return -EINVAL; } if (req->fr_flags & VR_RFLOW_VALID) { rfe = vr_get_flow_entry(router, req->fr_rindex); if (!rfe) return -EINVAL; } /* * for delete, we need not validate nh_index from incoming request */ if (req->fr_flags & VR_FLOW_FLAG_ACTIVE) { if (!__vrouter_get_nexthop(router, req->fr_src_nh_index)) return -EINVAL; } return 0; }
static void vr_flow_queue_free(struct vrouter *router, void *arg) { struct vr_forwarding_md fmd; struct vr_defer_data *defer; struct vr_flow_entry *fe; struct vr_flow_queue *vfq; defer = (struct vr_defer_data *)arg; if (!defer) return; vr_init_forwarding_md(&fmd); vfq = (struct vr_flow_queue *)defer->vdd_data; fe = vr_get_flow_entry(router, vfq->vfq_index); if (fe) { vr_flow_set_forwarding_md(router, fe, vfq->vfq_index, &fmd); vr_flush_flow_queue(router, fe, &fmd, vfq); } vr_free(vfq); return; }
static void vr_flow_table_reset(struct vrouter *router) { unsigned int start, end, i; struct vr_flow_entry *fe; struct vr_forwarding_md fmd; struct vr_flow_md flmd; start = end = 0; if (router->vr_flow_table) end = vr_btable_entries(router->vr_flow_table); if (router->vr_oflow_table) { if (!end) start = vr_flow_entries; end += vr_btable_entries(router->vr_oflow_table); } if (end) { vr_init_forwarding_md(&fmd); flmd.flmd_action = VR_FLOW_ACTION_DROP; for (i = start; i < end; i++) { fe = vr_get_flow_entry(router, i); if (fe) { flmd.flmd_index = i; flmd.flmd_flags = fe->fe_flags; fe->fe_action = VR_FLOW_ACTION_DROP; vr_flush_entry(router, fe, &flmd, &fmd); vr_reset_flow_entry(router, fe, i); } } } vr_flow_table_info_reset(router); return; }
static void vr_flow_tcp_digest(struct vrouter *router, struct vr_flow_entry *flow_e, struct vr_packet *pkt, struct vr_forwarding_md *fmd) { uint16_t tcp_offset_flags; unsigned int length; struct vr_ip *iph; struct vr_ip6 *ip6h; struct vr_tcp *tcph = NULL; struct vr_flow_entry *rflow_e = NULL; iph = (struct vr_ip *)pkt_network_header(pkt); if (!vr_ip_transport_header_valid(iph)) return; if (pkt->vp_type == VP_TYPE_IP) { if (iph->ip_proto != VR_IP_PROTO_TCP) return; length = ntohs(iph->ip_len) - (iph->ip_hl * 4); tcph = (struct vr_tcp *)((unsigned char *)iph + (iph->ip_hl * 4)); } else if (pkt->vp_type == VP_TYPE_IP6) { ip6h = (struct vr_ip6 *)iph; if (ip6h->ip6_nxt != VR_IP_PROTO_TCP) return; length = ntohs(ip6h->ip6_plen); tcph = (struct vr_tcp *)((unsigned char *)iph + sizeof(struct vr_ip6)); } if (tcph) { /* * there are some optimizations here that makes the code slightly * not so frugal. For e.g.: the *_R flags are used to make sure that * for a packet that contains ACK, we will not need to fetch the * reverse flow if we are not interested, thus saving some execution * time. */ tcp_offset_flags = ntohs(tcph->tcp_offset_r_flags); /* if we get a reset, session has to be closed */ if (tcp_offset_flags & VR_TCP_FLAG_RST) { (void)__sync_fetch_and_or(&flow_e->fe_tcp_flags, VR_FLOW_TCP_RST); if (flow_e->fe_flags & VR_RFLOW_VALID) { rflow_e = vr_get_flow_entry(router, flow_e->fe_rflow); if (rflow_e) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_RST); } } vr_flow_init_close(router, flow_e, pkt, fmd); return; } else if (tcp_offset_flags & VR_TCP_FLAG_SYN) { /* if only a SYN... */ flow_e->fe_tcp_seq = ntohl(tcph->tcp_seq); (void)__sync_fetch_and_or(&flow_e->fe_tcp_flags, VR_FLOW_TCP_SYN); if (flow_e->fe_flags & VR_RFLOW_VALID) { rflow_e = vr_get_flow_entry(router, flow_e->fe_rflow); if (rflow_e) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_SYN_R); if ((flow_e->fe_tcp_flags & VR_FLOW_TCP_SYN_R) && (tcp_offset_flags & VR_TCP_FLAG_ACK)) { if (ntohl(tcph->tcp_ack) == (rflow_e->fe_tcp_seq + 1)) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_ESTABLISHED); (void)__sync_fetch_and_or(&flow_e->fe_tcp_flags, VR_FLOW_TCP_ESTABLISHED_R); } } } } } else if (tcp_offset_flags & VR_TCP_FLAG_FIN) { /* * when a FIN is received, update the sequence of the FIN and set * the flow FIN flag. It is possible that the FIN packet came with * some data, in which case the sequence number of the FIN is one * more than the last data byte in the sequence */ length -= (((tcp_offset_flags) >> 12) * 4); flow_e->fe_tcp_seq = ntohl(tcph->tcp_seq) + length; (void)__sync_fetch_and_or(&flow_e->fe_tcp_flags, VR_FLOW_TCP_FIN); /* * when an ack for a FIN is sent, we need to take some actions * on the reverse flow (since FIN came in the reverse flow). to * avoid looking up the reverse flow for all acks, we mark the * reverse flow's reverse flow with a flag (FIN_R). we will * lookup the reverse flow only if this flag is set and the * tcp header has an ack bit set */ if (flow_e->fe_flags & VR_RFLOW_VALID) { rflow_e = vr_get_flow_entry(router, flow_e->fe_rflow); if (rflow_e) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_FIN_R); } } } /* * if FIN_R is set in the flow and if the ACK bit is set in the * tcp header, then we need to mark the reverse flow as dead. * * OR * * if the SYN_R is set and ESTABLISHED_R is not set and if this * is an ack packet, if this ack completes the connection, we * need to set ESTABLISHED */ if (((flow_e->fe_tcp_flags & VR_FLOW_TCP_FIN_R) || (!(flow_e->fe_tcp_flags & VR_FLOW_TCP_ESTABLISHED_R) && (flow_e->fe_tcp_flags & VR_FLOW_TCP_SYN_R))) && (tcp_offset_flags & VR_TCP_FLAG_ACK)) { if (flow_e->fe_flags & VR_RFLOW_VALID) { if (!rflow_e) { rflow_e = vr_get_flow_entry(router, flow_e->fe_rflow); } if (rflow_e) { if ((ntohl(tcph->tcp_ack) == (rflow_e->fe_tcp_seq + 1)) && (flow_e->fe_tcp_flags & VR_FLOW_TCP_FIN_R)) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_HALF_CLOSE); /* * both the forward and the reverse flows are * now dead */ if (flow_e->fe_tcp_flags & VR_FLOW_TCP_HALF_CLOSE) { vr_flow_init_close(router, flow_e, pkt, fmd); } } else if (ntohl(tcph->tcp_ack) != rflow_e->fe_tcp_seq) { if (!(flow_e->fe_tcp_flags & VR_FLOW_TCP_ESTABLISHED_R)) { (void)__sync_fetch_and_or(&rflow_e->fe_tcp_flags, VR_FLOW_TCP_ESTABLISHED); (void)__sync_fetch_and_or(&flow_e->fe_tcp_flags, VR_FLOW_TCP_ESTABLISHED_R); } } } } } }
static int vr_flow_nat(unsigned short vrf, struct vr_flow_entry *fe, struct vr_packet *pkt, unsigned short proto, struct vr_forwarding_md *fmd) { unsigned int ip_inc, inc = 0; unsigned short *t_sport, *t_dport; struct vrouter *router = pkt->vp_if->vif_router; struct vr_flow_entry *rfe; struct vr_ip *ip, *icmp_pl_ip; struct vr_icmp *icmph; bool hdr_update = false; if (fe->fe_rflow < 0) goto drop; rfe = vr_get_flow_entry(router, fe->fe_rflow); if (!rfe) goto drop; ip = (struct vr_ip *)pkt_data(pkt); if (ip->ip_proto == VR_IP_PROTO_ICMP) { icmph = (struct vr_icmp *)((unsigned char *)ip + (ip->ip_hl * 4)); if (vr_icmp_error(icmph)) { icmp_pl_ip = (struct vr_ip *)(icmph + 1); if (fe->fe_flags & VR_FLOW_FLAG_SNAT) { icmp_pl_ip->ip_daddr = rfe->fe_key.key_dest_ip; hdr_update = true; } if (fe->fe_flags & VR_FLOW_FLAG_DNAT) { icmp_pl_ip->ip_saddr = rfe->fe_key.key_src_ip; hdr_update = true; } if (hdr_update) icmp_pl_ip->ip_csum = vr_ip_csum(icmp_pl_ip); t_sport = (unsigned short *)((unsigned char *)icmp_pl_ip + (icmp_pl_ip->ip_hl * 4)); t_dport = t_sport + 1; if (fe->fe_flags & VR_FLOW_FLAG_SPAT) *t_dport = rfe->fe_key.key_dst_port; if (fe->fe_flags & VR_FLOW_FLAG_DPAT) *t_sport = rfe->fe_key.key_src_port; } } if ((fe->fe_flags & VR_FLOW_FLAG_SNAT) && (ip->ip_saddr == fe->fe_key.key_src_ip)) { vr_incremental_diff(ip->ip_saddr, rfe->fe_key.key_dest_ip, &inc); ip->ip_saddr = rfe->fe_key.key_dest_ip; } if (fe->fe_flags & VR_FLOW_FLAG_DNAT) { vr_incremental_diff(ip->ip_daddr, rfe->fe_key.key_src_ip, &inc); ip->ip_daddr = rfe->fe_key.key_src_ip; } ip_inc = inc; if (vr_ip_transport_header_valid(ip)) { t_sport = (unsigned short *)((unsigned char *)ip + (ip->ip_hl * 4)); t_dport = t_sport + 1; if (fe->fe_flags & VR_FLOW_FLAG_SPAT) { vr_incremental_diff(*t_sport, rfe->fe_key.key_dst_port, &inc); *t_sport = rfe->fe_key.key_dst_port; } if (fe->fe_flags & VR_FLOW_FLAG_DPAT) { vr_incremental_diff(*t_dport, rfe->fe_key.key_src_port, &inc); *t_dport = rfe->fe_key.key_src_port; } } #ifdef VROUTER_CONFIG_DIAG if (ip->ip_csum != VR_DIAG_IP_CSUM) vr_ip_update_csum(pkt, ip_inc, inc); #else vr_ip_update_csum(pkt, ip_inc, inc); #endif /* * If VRF is translated lets chose a new nexthop */ if ((fe->fe_flags & VR_FLOW_FLAG_VRFT) && pkt->vp_nh && pkt->vp_nh->nh_vrf != vrf) pkt->vp_nh = NULL; return vr_flow_forward(vrf, pkt, proto, fmd); drop: vr_pfree(pkt, VP_DROP_FLOW_NAT_NO_RFLOW); return 0; }
/* command from agent */ static int vr_flow_set(struct vrouter *router, vr_flow_req *req) { int ret; unsigned int fe_index; struct vr_flow_entry *fe = NULL; struct vr_flow_table_info *infop = router->vr_flow_table_info; router = vrouter_get(req->fr_rid); if (!router) return -EINVAL; fe = vr_get_flow_entry(router, req->fr_index); if ((ret = vr_flow_req_is_invalid(router, req, fe))) return ret; if (fe && (fe->fe_action == VR_FLOW_ACTION_HOLD) && ((req->fr_action != fe->fe_action) || !(req->fr_flags & VR_FLOW_FLAG_ACTIVE))) __sync_fetch_and_add(&infop->vfti_action_count, 1); /* * for delete, absence of the requested flow entry is caustic. so * handle that case first */ if (!(req->fr_flags & VR_FLOW_FLAG_ACTIVE)) { if (!fe) return -EINVAL; return vr_flow_delete(router, req, fe); } /* * for non-delete cases, absence of flow entry means addition of a * new flow entry with the key specified in the request */ if (!fe) { fe = vr_add_flow_req(req, &fe_index); if (!fe) return -ENOSPC; } vr_flow_set_mirror(router, req, fe); if (req->fr_flags & VR_RFLOW_VALID) { fe->fe_rflow = req->fr_rindex; } else { if (fe->fe_rflow >= 0) fe->fe_rflow = -1; } fe->fe_vrf = req->fr_flow_vrf; if (req->fr_flags & VR_FLOW_FLAG_VRFT) fe->fe_dvrf = req->fr_flow_dvrf; fe->fe_ecmp_nh_index = req->fr_ecmp_nh_index; fe->fe_src_nh_index = req->fr_src_nh_index; fe->fe_action = req->fr_action; fe->fe_flags = req->fr_flags; return vr_flow_schedule_transition(router, req, fe); }