static void sigchld_handler(int signr OVS_UNUSED) { struct process *p; COVERAGE_INC(process_sigchld); LIST_FOR_EACH (p, struct process, node, &all_processes) { if (!p->exited) { int retval, status; do { retval = waitpid(p->pid, &status, WNOHANG); } while (retval == -1 && errno == EINTR); if (retval == p->pid) { p->exited = true; p->status = status; } else if (retval < 0) { /* XXX We want to log something but we're in a signal * handler. */ p->exited = true; p->status = -1; } } } ignore(write(fds[1], "", 1)); }
static int nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, uint32_t nlmsg_seq, bool wait) { struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg); int error; nlmsg->nlmsg_len = ofpbuf_size(msg); nlmsg->nlmsg_seq = nlmsg_seq; nlmsg->nlmsg_pid = sock->pid; do { int retval; #ifdef _WIN32 bool result; DWORD last_error = 0; result = WriteFile(sock->handle, ofpbuf_data(msg), ofpbuf_size(msg), &retval, NULL); last_error = GetLastError(); if (last_error != ERROR_SUCCESS && !result) { retval = -1; errno = EAGAIN; } #else retval = send(sock->fd, ofpbuf_data(msg), ofpbuf_size(msg), wait ? 0 : MSG_DONTWAIT); #endif error = retval < 0 ? errno : 0; } while (error == EINTR); log_nlmsg(__func__, error, ofpbuf_data(msg), ofpbuf_size(msg), sock->protocol); if (!error) { COVERAGE_INC(netlink_sent); } return error; }
static void resize(struct hmap *hmap, size_t new_mask, const char *where) { struct hmap tmp; size_t i; ovs_assert(is_pow2(new_mask + 1)); hmap_init(&tmp); if (new_mask) { tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1)); tmp.mask = new_mask; for (i = 0; i <= tmp.mask; i++) { tmp.buckets[i] = NULL; } } for (i = 0; i <= hmap->mask; i++) { struct hmap_node *node, *next; int count = 0; for (node = hmap->buckets[i]; node; node = next) { next = node->next; hmap_insert_fast(&tmp, node, node->hash); count++; } if (count > 5) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); COVERAGE_INC(hmap_pathological); VLOG_DBG_RL(&rl, "%s: %d nodes in bucket (%"PRIuSIZE" nodes, %"PRIuSIZE" buckets)", where, count, hmap->n, hmap->mask + 1); } } hmap_swap(hmap, &tmp); hmap_destroy(&tmp); }
int process_run(char **argv, const int keep_fds[], size_t n_keep_fds, const int null_fds[], size_t n_null_fds, int *status) { struct process *p; int retval; COVERAGE_INC(process_run); retval = process_start(argv, keep_fds, n_keep_fds, null_fds, n_null_fds, &p); if (retval) { *status = 0; return retval; } while (!process_exited(p)) { process_wait(p); poll_block(); } *status = process_status(p); process_destroy(p); return 0; }
/* Attempts to make 'ml' learn from the fact that a frame from 'src_mac' was * just observed arriving from 'src_port' on the given 'vlan'. * * Returns nonzero if we actually learned something from this, zero if it just * confirms what we already knew. The nonzero return value is the tag of flows * that now need revalidation. * * The 'vlan' parameter is used to maintain separate per-VLAN learning tables. * Specify 0 if this behavior is undesirable. * * 'lock_type' specifies whether the entry should be locked or existing locks * are check. */ tag_type mac_learning_learn(struct mac_learning *ml, const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan, uint16_t src_port, enum grat_arp_lock_type lock_type) { struct mac_entry *e; struct list *bucket; if (!is_learning_vlan(ml, vlan)) { return 0; } if (eth_addr_is_multicast(src_mac)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 30); VLOG_DBG_RL(&rl, "multicast packet source "ETH_ADDR_FMT, ETH_ADDR_ARGS(src_mac)); return 0; } bucket = mac_table_bucket(ml, src_mac, vlan); e = search_bucket(bucket, src_mac, vlan); if (!e) { if (!list_is_empty(&ml->free)) { e = mac_entry_from_lru_node(ml->free.next); } else { e = mac_entry_from_lru_node(ml->lrus.next); list_remove(&e->hash_node); } memcpy(e->mac, src_mac, ETH_ADDR_LEN); list_push_front(bucket, &e->hash_node); e->port = -1; e->vlan = vlan; e->tag = make_unknown_mac_tag(ml, src_mac, vlan); e->grat_arp_lock = TIME_MIN; } if (lock_type != GRAT_ARP_LOCK_CHECK || time_now() >= e->grat_arp_lock) { /* Make the entry most-recently-used. */ list_remove(&e->lru_node); list_push_back(&ml->lrus, &e->lru_node); e->expires = time_now() + MAC_ENTRY_IDLE_TIME; if (lock_type == GRAT_ARP_LOCK_SET) { e->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME; } /* Did we learn something? */ if (e->port != src_port) { tag_type old_tag = e->tag; e->port = src_port; e->tag = tag_create_random(); COVERAGE_INC(mac_learning_learned); return old_tag; } } return 0; }
/* Shrinks 'hindex', if necessary, to optimize the performance of iteration. */ void hindex_shrink(struct hindex *hindex) { size_t new_mask = hindex_calc_mask(hindex->n_unique); if (new_mask < hindex->mask) { COVERAGE_INC(hindex_shrink); hindex_resize(hindex, new_mask); } }
/* Expands 'hmap', if necessary, to optimize the performance of searches when * it has up to 'n' elements. (But iteration will be slow in a hash map whose * allocated capacity is much higher than its current number of nodes.) * * ('where' is used in debug logging. Commonly one would use hmap_reserve() to * automatically provide the caller's source file and line number for * 'where'.) */ void hmap_reserve_at(struct hmap *hmap, size_t n, const char *where) { size_t new_mask = calc_mask(n); if (new_mask > hmap->mask) { COVERAGE_INC(hmap_reserve); resize(hmap, new_mask, where); } }
/* Shrinks 'hmap', if necessary, to optimize the performance of iteration. * * ('where' is used in debug logging. Commonly one would use hmap_shrink() to * automatically provide the caller's source file and line number for * 'where'.) */ void hmap_shrink_at(struct hmap *hmap, const char *where) { size_t new_mask = calc_mask(hmap->n); if (new_mask < hmap->mask) { COVERAGE_INC(hmap_shrink); resize(hmap, new_mask, where); } }
/* Expands 'hindex', if necessary, to optimize the performance of searches when * it has up to 'n' unique hashes. (But iteration will be slow in a hash index * whose allocated capacity is much higher than its current number of * nodes.) */ void hindex_reserve(struct hindex *hindex, size_t n) { size_t new_mask = hindex_calc_mask(n); if (new_mask > hindex->mask) { COVERAGE_INC(hindex_reserve); hindex_resize(hindex, new_mask); } }
/* Expands 'hindex', if necessary, to optimize the performance of searches. */ void hindex_expand(struct hindex *hindex) { size_t new_mask = hindex_calc_mask(hindex->n_unique); if (new_mask > hindex->mask) { COVERAGE_INC(hindex_expand); hindex_resize(hindex, new_mask); } }
void * xmalloc(size_t size) { void *p = malloc(size ? size : 1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; }
void * xcalloc(size_t count, size_t size) { void *p = count && size ? calloc(count, size) : malloc(1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; }
void * xrealloc(void *p, size_t size) { p = realloc(p, size ? size : 1); COVERAGE_INC(util_xalloc); if (p == NULL) { out_of_memory(); } return p; }
static uint32_t * miniflow_alloc_values(struct miniflow *flow, int n) { if (n <= MINI_N_INLINE) { return flow->inline_values; } else { COVERAGE_INC(miniflow_malloc); return xmalloc(n * sizeof *flow->values); } }
void mac_learning_run(struct mac_learning *ml, struct tag_set *set) { struct mac_entry *e; while (get_lru(ml, &e) && time_now() >= e->expires) { COVERAGE_INC(mac_learning_expired); if (set) { tag_set_add(set, e->tag); } free_mac_entry(ml, e); } }
/* Starts a subprocess with the arguments in the null-terminated argv[] array. * argv[0] is used as the name of the process. Searches the PATH environment * variable to find the program to execute. * * All file descriptors are closed before executing the subprocess, except for * fds 0, 1, and 2 and the 'n_keep_fds' fds listed in 'keep_fds'. Also, any of * the 'n_null_fds' fds listed in 'null_fds' are replaced by /dev/null. * * Returns 0 if successful, otherwise a positive errno value indicating the * error. If successful, '*pp' is assigned a new struct process that may be * used to query the process's status. On failure, '*pp' is set to NULL. */ int process_start(char **argv, const int keep_fds[], size_t n_keep_fds, const int null_fds[], size_t n_null_fds, struct process **pp) { sigset_t oldsigs; pid_t pid; int error; *pp = NULL; COVERAGE_INC(process_start); error = process_prestart(argv); if (error) { return error; } block_sigchld(&oldsigs); pid = fork(); if (pid < 0) { unblock_sigchld(&oldsigs); VLOG_WARN("fork failed: %s", strerror(errno)); return errno; } else if (pid) { /* Running in parent process. */ *pp = process_register(argv[0], pid); unblock_sigchld(&oldsigs); return 0; } else { /* Running in child process. */ int fd_max = get_max_fds(); int fd; fatal_signal_fork(); unblock_sigchld(&oldsigs); for (fd = 0; fd < fd_max; fd++) { if (is_member(fd, null_fds, n_null_fds)) { /* We can't use get_null_fd() here because we might have * already closed its fd. */ int nullfd = open("/dev/null", O_RDWR); dup2(nullfd, fd); close(nullfd); } else if (fd >= 3 && !is_member(fd, keep_fds, n_keep_fds)) { close(fd); } } execvp(argv[0], argv); fprintf(stderr, "execvp(\"%s\") failed: %s\n", argv[0], strerror(errno)); _exit(1); } }
/* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and * 'ofp_in_port'. * * Initializes 'packet' header pointers as follows: * * - packet->l2 to the start of the Ethernet header. * * - packet->l2_5 to the start of the MPLS shim header. * * - packet->l3 to just past the Ethernet header, or just past the * vlan_header if one is present, to the first byte of the payload of the * Ethernet frame. * * - packet->l4 to just past the IPv4 header, if one is present and has a * correct length, and otherwise NULL. * * - packet->l7 to just past the TCP or UDP or ICMP header, if one is * present and has a correct length, and otherwise NULL. */ void flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark, const struct flow_tnl *tnl, uint16_t ofp_in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; COVERAGE_INC(flow_extract); memset(flow, 0, sizeof *flow); if (tnl) { ovs_assert(tnl != &flow->tunnel); flow->tunnel = *tnl; } flow->in_port = ofp_in_port; flow->skb_priority = skb_priority; flow->skb_mark = skb_mark; packet->l2 = b.data; packet->l2_5 = NULL; packet->l3 = NULL; packet->l4 = NULL; packet->l7 = NULL; if (b.size < sizeof *eth) { return; } /* Link layer. */ eth = b.data; memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); /* dl_type, vlan_tci. */ ofpbuf_pull(&b, ETH_ADDR_LEN * 2); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { parse_vlan(&b, flow); } flow->dl_type = parse_ethertype(&b); /* Parse mpls, copy l3 ttl. */ if (eth_type_mpls(flow->dl_type)) { packet->l2_5 = b.data; parse_mpls(&b, flow); } packet->l3 = b.data; flow_extract_l3_onwards(packet, flow, flow->dl_type); }
static int nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, uint32_t nlmsg_seq, bool wait) { struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg); int error; nlmsg->nlmsg_len = msg->size; nlmsg->nlmsg_seq = nlmsg_seq; nlmsg->nlmsg_pid = sock->pid; do { int retval; retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT); error = retval < 0 ? errno : 0; } while (error == EINTR); log_nlmsg(__func__, error, msg->data, msg->size, sock->protocol); if (!error) { COVERAGE_INC(netlink_sent); } return error; }
/* Reallocates 'hindex''s array of buckets to use bitwise mask 'new_mask'. */ static void hindex_resize(struct hindex *hindex, size_t new_mask) { struct hindex tmp; size_t i; ovs_assert(is_pow2(new_mask + 1)); ovs_assert(new_mask != SIZE_MAX); hindex_init(&tmp); if (new_mask) { tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1)); tmp.mask = new_mask; for (i = 0; i <= tmp.mask; i++) { tmp.buckets[i] = NULL; } } for (i = 0; i <= hindex->mask; i++) { struct hindex_node *node, *next; int count; count = 0; for (node = hindex->buckets[i]; node; node = next) { struct hindex_node **head = &tmp.buckets[node->hash & tmp.mask]; next = node->d; node->d = *head; *head = node; count++; } if (count > 5) { COVERAGE_INC(hindex_pathological); } } tmp.n_unique = hindex->n_unique; hindex_swap(hindex, &tmp); hindex_destroy(&tmp); }
/* Starts the process whose arguments are given in the null-terminated array * 'argv' and waits for it to exit. On success returns 0 and stores the * process exit value (suitable for passing to process_status_msg()) in * '*status'. On failure, returns a positive errno value and stores 0 in * '*status'. * * If 'stdout_log' is nonnull, then the subprocess's output to stdout (up to a * limit of PROCESS_MAX_CAPTURE bytes) is captured in a memory buffer, which * when this function returns 0 is stored as a null-terminated string in * '*stdout_log'. The caller is responsible for freeing '*stdout_log' (by * passing it to free()). When this function returns an error, '*stdout_log' * is set to NULL. * * If 'stderr_log' is nonnull, then it is treated like 'stdout_log' except * that it captures the subprocess's output to stderr. */ int process_run_capture(char **argv, char **stdout_log, char **stderr_log, int *status) { struct stream s_stdout, s_stderr; sigset_t oldsigs; pid_t pid; int error; COVERAGE_INC(process_run_capture); if (stdout_log) { *stdout_log = NULL; } if (stderr_log) { *stderr_log = NULL; } *status = 0; error = process_prestart(argv); if (error) { return error; } error = stream_open(&s_stdout); if (error) { return error; } error = stream_open(&s_stderr); if (error) { stream_close(&s_stdout); return error; } block_sigchld(&oldsigs); pid = fork(); if (pid < 0) { int error = errno; unblock_sigchld(&oldsigs); VLOG_WARN("fork failed: %s", strerror(error)); stream_close(&s_stdout); stream_close(&s_stderr); *status = 0; return error; } else if (pid) { /* Running in parent process. */ struct process *p; p = process_register(argv[0], pid); unblock_sigchld(&oldsigs); close(s_stdout.fds[1]); close(s_stderr.fds[1]); while (!process_exited(p)) { stream_read(&s_stdout); stream_read(&s_stderr); stream_wait(&s_stdout); stream_wait(&s_stderr); process_wait(p); poll_block(); } stream_read(&s_stdout); stream_read(&s_stderr); if (stdout_log) { *stdout_log = ds_steal_cstr(&s_stdout.log); } if (stderr_log) { *stderr_log = ds_steal_cstr(&s_stderr.log); } stream_close(&s_stdout); stream_close(&s_stderr); *status = process_status(p); process_destroy(p); return 0; } else { /* Running in child process. */ int max_fds; int i; fatal_signal_fork(); unblock_sigchld(&oldsigs); dup2(get_null_fd(), 0); dup2(s_stdout.fds[1], 1); dup2(s_stderr.fds[1], 2); max_fds = get_max_fds(); for (i = 3; i < max_fds; i++) { close(i); } execvp(argv[0], argv); fprintf(stderr, "execvp(\"%s\") failed: %s\n", argv[0], strerror(errno)); exit(EXIT_FAILURE); } }
/* Initializes 'flow' members from 'packet', 'tun_id', and 'ofp_in_port'. * Initializes 'packet' header pointers as follows: * * - packet->l2 to the start of the Ethernet header. * * - packet->l3 to just past the Ethernet header, or just past the * vlan_header if one is present, to the first byte of the payload of the * Ethernet frame. * * - packet->l4 to just past the IPv4 header, if one is present and has a * correct length, and otherwise NULL. * * - packet->l7 to just past the TCP or UDP or ICMP header, if one is * present and has a correct length, and otherwise NULL. */ void flow_extract(struct ofpbuf *packet, uint32_t priority, ovs_be64 tun_id, uint16_t ofp_in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; COVERAGE_INC(flow_extract); memset(flow, 0, sizeof *flow); flow->tun_id = tun_id; flow->in_port = ofp_in_port; flow->priority = priority; packet->l2 = b.data; packet->l3 = NULL; packet->l4 = NULL; packet->l7 = NULL; if (b.size < sizeof *eth) { return; } /* Link layer. */ eth = b.data; memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); /* dl_type, vlan_tci. */ ofpbuf_pull(&b, ETH_ADDR_LEN * 2); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { parse_vlan(&b, flow); } flow->dl_type = parse_ethertype(&b); /* Network layer. */ packet->l3 = b.data; if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { packet->l4 = b.data; flow->nw_src = get_unaligned_be32(&nh->ip_src); flow->nw_dst = get_unaligned_be32(&nh->ip_dst); flow->nw_proto = nh->ip_proto; flow->nw_tos = nh->ip_tos; if (IP_IS_FRAGMENT(nh->ip_frag_off)) { flow->nw_frag = FLOW_NW_FRAG_ANY; if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { flow->nw_frag |= FLOW_NW_FRAG_LATER; } } flow->nw_ttl = nh->ip_ttl; if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) { if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMP) { const struct icmp_header *icmp = pull_icmp(&b); if (icmp) { flow->tp_src = htons(icmp->icmp_type); flow->tp_dst = htons(icmp->icmp_code); packet->l7 = b.data; } } } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { if (parse_ipv6(&b, flow)) { return; } packet->l4 = b.data; if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMPV6) { if (parse_icmpv6(&b, flow)) { packet->l7 = b.data; } } } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { const struct arp_eth_header *arp = pull_arp(&b); if (arp && arp->ar_hrd == htons(1) && arp->ar_pro == htons(ETH_TYPE_IP) && arp->ar_hln == ETH_ADDR_LEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) { flow->nw_proto = ntohs(arp->ar_op); } if ((flow->nw_proto == ARP_OP_REQUEST) || (flow->nw_proto == ARP_OP_REPLY)) { flow->nw_src = arp->ar_spa; flow->nw_dst = arp->ar_tpa; memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN); memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN); } } } }
/* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and * 'in_port'. * * Initializes 'packet' header pointers as follows: * * - packet->l2 to the start of the Ethernet header. * * - packet->l2_5 to the start of the MPLS shim header. * * - packet->l3 to just past the Ethernet header, or just past the * vlan_header if one is present, to the first byte of the payload of the * Ethernet frame. * * - packet->l4 to just past the IPv4 header, if one is present and has a * correct length, and otherwise NULL. * * - packet->l7 to just past the TCP/UDP/SCTP/ICMP header, if one is * present and has a correct length, and otherwise NULL. */ void flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark, const struct flow_tnl *tnl, const union flow_in_port *in_port, struct flow *flow) { struct ofpbuf b = *packet; struct eth_header *eth; COVERAGE_INC(flow_extract); memset(flow, 0, sizeof *flow); if (tnl) { ovs_assert(tnl != &flow->tunnel); flow->tunnel = *tnl; } if (in_port) { flow->in_port = *in_port; } flow->skb_priority = skb_priority; flow->pkt_mark = pkt_mark; packet->l2 = b.data; packet->l2_5 = NULL; packet->l3 = NULL; packet->l4 = NULL; packet->l7 = NULL; if (b.size < sizeof *eth) { return; } /* Link layer. */ eth = b.data; memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); /* dl_type, vlan_tci. */ ofpbuf_pull(&b, ETH_ADDR_LEN * 2); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { parse_vlan(&b, flow); } flow->dl_type = parse_ethertype(&b); /* Parse mpls, copy l3 ttl. */ if (eth_type_mpls(flow->dl_type)) { packet->l2_5 = b.data; parse_mpls(&b, flow); } /* Network layer. */ packet->l3 = b.data; if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { packet->l4 = b.data; flow->nw_src = get_16aligned_be32(&nh->ip_src); flow->nw_dst = get_16aligned_be32(&nh->ip_dst); flow->nw_proto = nh->ip_proto; flow->nw_tos = nh->ip_tos; if (IP_IS_FRAGMENT(nh->ip_frag_off)) { flow->nw_frag = FLOW_NW_FRAG_ANY; if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) { flow->nw_frag |= FLOW_NW_FRAG_LATER; } } flow->nw_ttl = nh->ip_ttl; if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) { if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_SCTP) { parse_sctp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMP) { const struct icmp_header *icmp = pull_icmp(&b); if (icmp) { flow->tp_src = htons(icmp->icmp_type); flow->tp_dst = htons(icmp->icmp_code); packet->l7 = b.data; } } } } } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { if (parse_ipv6(&b, flow)) { return; } packet->l4 = b.data; if (flow->nw_proto == IPPROTO_TCP) { parse_tcp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_UDP) { parse_udp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_SCTP) { parse_sctp(packet, &b, flow); } else if (flow->nw_proto == IPPROTO_ICMPV6) { if (parse_icmpv6(&b, flow)) { packet->l7 = b.data; } } } else if (flow->dl_type == htons(ETH_TYPE_ARP) || flow->dl_type == htons(ETH_TYPE_RARP)) { const struct arp_eth_header *arp = pull_arp(&b); if (arp && arp->ar_hrd == htons(1) && arp->ar_pro == htons(ETH_TYPE_IP) && arp->ar_hln == ETH_ADDR_LEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) { flow->nw_proto = ntohs(arp->ar_op); } flow->nw_src = get_16aligned_be32(&arp->ar_spa); flow->nw_dst = get_16aligned_be32(&arp->ar_tpa); memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN); memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN); } } }
static int nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; #ifdef _WIN32 #define MAX_STACK_LENGTH 81920 uint8_t tail[MAX_STACK_LENGTH]; #else uint8_t tail[65536]; #endif struct iovec iov[2]; struct msghdr msg; ssize_t retval; int error; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = ofpbuf_base(buf); iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; /* Receive a Netlink message from the kernel. * * This works around a kernel bug in which the kernel returns an error code * as if it were the number of bytes read. It doesn't actually modify * anything in the receive buffer in that case, so we can initialize the * Netlink header with an impossible message length and then, upon success, * check whether it changed. */ nlmsghdr = ofpbuf_base(buf); do { nlmsghdr->nlmsg_len = UINT32_MAX; #ifdef _WIN32 boolean result = false; DWORD last_error = 0; result = ReadFile(sock->handle, tail, MAX_STACK_LENGTH, &retval, NULL); last_error = GetLastError(); if (last_error != ERROR_SUCCESS && !result) { retval = -1; errno = EAGAIN; } else { ofpbuf_put(buf, tail, retval); } #else retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); #endif error = (retval < 0 ? errno : retval == 0 ? ECONNRESET /* not possible? */ : nlmsghdr->nlmsg_len != UINT32_MAX ? 0 : retval); } while (error == EINTR); if (error) { if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %"PRIuSIZE" bytes)", sizeof tail); return E2BIG; } if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%"PRIuSIZE" bytes < %"PRIuSIZE")", retval, sizeof *nlmsghdr); return EPROTO; } #ifndef _WIN32 ofpbuf_set_size(buf, MIN(retval, buf->allocated)); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } #endif log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol); COVERAGE_INC(netlink_received); return 0; }
static int nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; uint8_t tail[65536]; struct iovec iov[2]; struct msghdr msg; ssize_t retval; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = buf->base; iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; do { retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); } while (retval < 0 && errno == EINTR); if (retval < 0) { int error = errno; if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %zu bytes)", sizeof tail); return E2BIG; } nlmsghdr = buf->data; if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %zu)", retval, sizeof *nlmsghdr); return EPROTO; } buf->size = MIN(retval, buf->allocated); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); return 0; }
/* 'tun_id' is in network byte order, while 'in_port' is in host byte order. * These byte orders are the same as they are in struct odp_flow_key. * * Initializes packet header pointers as follows: * * - packet->l2 to the start of the Ethernet header. * * - packet->l3 to just past the Ethernet header, or just past the * vlan_header if one is present, to the first byte of the payload of the * Ethernet frame. * * - packet->l4 to just past the IPv4 header, if one is present and has a * correct length, and otherwise NULL. * * - packet->l7 to just past the TCP or UDP or ICMP header, if one is * present and has a correct length, and otherwise NULL. */ int flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port, flow_t *flow) { struct ofpbuf b = *packet; struct eth_header *eth; int retval = 0; COVERAGE_INC(flow_extract); memset(flow, 0, sizeof *flow); flow->tun_id = tun_id; flow->in_port = in_port; flow->dl_vlan = htons(OFP_VLAN_NONE); packet->l2 = b.data; packet->l3 = NULL; packet->l4 = NULL; packet->l7 = NULL; if (b.size < sizeof *eth) { return 0; } /* Link layer. */ eth = b.data; memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN); memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN); /* dl_type, dl_vlan, dl_vlan_pcp. */ ofpbuf_pull(&b, ETH_ADDR_LEN * 2); if (eth->eth_type == htons(ETH_TYPE_VLAN)) { parse_vlan(&b, flow); } flow->dl_type = parse_ethertype(&b); /* Network layer. */ packet->l3 = b.data; if (flow->dl_type == htons(ETH_TYPE_IP)) { const struct ip_header *nh = pull_ip(&b); if (nh) { flow->nw_src = get_unaligned_u32(&nh->ip_src); flow->nw_dst = get_unaligned_u32(&nh->ip_dst); flow->nw_tos = nh->ip_tos & IP_DSCP_MASK; flow->nw_proto = nh->ip_proto; packet->l4 = b.data; if (!IP_IS_FRAGMENT(nh->ip_frag_off)) { if (flow->nw_proto == IP_TYPE_TCP) { const struct tcp_header *tcp = pull_tcp(&b); if (tcp) { flow->tp_src = tcp->tcp_src; flow->tp_dst = tcp->tcp_dst; packet->l7 = b.data; } } else if (flow->nw_proto == IP_TYPE_UDP) { const struct udp_header *udp = pull_udp(&b); if (udp) { flow->tp_src = udp->udp_src; flow->tp_dst = udp->udp_dst; packet->l7 = b.data; } } else if (flow->nw_proto == IP_TYPE_ICMP) { const struct icmp_header *icmp = pull_icmp(&b); if (icmp) { flow->icmp_type = htons(icmp->icmp_type); flow->icmp_code = htons(icmp->icmp_code); packet->l7 = b.data; } } } else { retval = 1; } } } else if (flow->dl_type == htons(ETH_TYPE_ARP)) { const struct arp_eth_header *arp = pull_arp(&b); if (arp && arp->ar_hrd == htons(1) && arp->ar_pro == htons(ETH_TYPE_IP) && arp->ar_hln == ETH_ADDR_LEN && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ if (ntohs(arp->ar_op) <= 0xff) { flow->nw_proto = ntohs(arp->ar_op); } if ((flow->nw_proto == ARP_OP_REQUEST) || (flow->nw_proto == ARP_OP_REPLY)) { flow->nw_src = arp->ar_spa; flow->nw_dst = arp->ar_tpa; } } } return retval; }