Beispiel #1
0
static void
sigchld_handler(int signr OVS_UNUSED)
{
    struct process *p;

    COVERAGE_INC(process_sigchld);
    LIST_FOR_EACH (p, struct process, node, &all_processes) {
        if (!p->exited) {
            int retval, status;
            do {
                retval = waitpid(p->pid, &status, WNOHANG);
            } while (retval == -1 && errno == EINTR);
            if (retval == p->pid) {
                p->exited = true;
                p->status = status;
            } else if (retval < 0) {
                /* XXX We want to log something but we're in a signal
                 * handler. */
                p->exited = true;
                p->status = -1;
            }
        }
    }
    ignore(write(fds[1], "", 1));
}
static int
nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg,
               uint32_t nlmsg_seq, bool wait)
{
    struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg);
    int error;

    nlmsg->nlmsg_len = ofpbuf_size(msg);
    nlmsg->nlmsg_seq = nlmsg_seq;
    nlmsg->nlmsg_pid = sock->pid;
    do {
        int retval;
#ifdef _WIN32
        bool result;
        DWORD last_error = 0;
        result = WriteFile(sock->handle, ofpbuf_data(msg), ofpbuf_size(msg),
                           &retval, NULL);
        last_error = GetLastError();
        if (last_error != ERROR_SUCCESS && !result) {
            retval = -1;
            errno = EAGAIN;
        }
#else
        retval = send(sock->fd, ofpbuf_data(msg), ofpbuf_size(msg), wait ? 0 : MSG_DONTWAIT);
#endif
        error = retval < 0 ? errno : 0;
    } while (error == EINTR);
    log_nlmsg(__func__, error, ofpbuf_data(msg), ofpbuf_size(msg), sock->protocol);
    if (!error) {
        COVERAGE_INC(netlink_sent);
    }
    return error;
}
Beispiel #3
0
static void
resize(struct hmap *hmap, size_t new_mask, const char *where)
{
    struct hmap tmp;
    size_t i;

    ovs_assert(is_pow2(new_mask + 1));

    hmap_init(&tmp);
    if (new_mask) {
        tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1));
        tmp.mask = new_mask;
        for (i = 0; i <= tmp.mask; i++) {
            tmp.buckets[i] = NULL;
        }
    }
    for (i = 0; i <= hmap->mask; i++) {
        struct hmap_node *node, *next;
        int count = 0;
        for (node = hmap->buckets[i]; node; node = next) {
            next = node->next;
            hmap_insert_fast(&tmp, node, node->hash);
            count++;
        }
        if (count > 5) {
            static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
            COVERAGE_INC(hmap_pathological);
            VLOG_DBG_RL(&rl, "%s: %d nodes in bucket (%"PRIuSIZE" nodes, %"PRIuSIZE" buckets)",
                        where, count, hmap->n, hmap->mask + 1);
        }
    }
    hmap_swap(hmap, &tmp);
    hmap_destroy(&tmp);
}
Beispiel #4
0
int
process_run(char **argv,
            const int keep_fds[], size_t n_keep_fds,
            const int null_fds[], size_t n_null_fds,
            int *status)
{
    struct process *p;
    int retval;

    COVERAGE_INC(process_run);
    retval = process_start(argv, keep_fds, n_keep_fds, null_fds, n_null_fds,
                           &p);
    if (retval) {
        *status = 0;
        return retval;
    }

    while (!process_exited(p)) {
        process_wait(p);
        poll_block();
    }
    *status = process_status(p);
    process_destroy(p);
    return 0;
}
Beispiel #5
0
/* Attempts to make 'ml' learn from the fact that a frame from 'src_mac' was
 * just observed arriving from 'src_port' on the given 'vlan'.
 *
 * Returns nonzero if we actually learned something from this, zero if it just
 * confirms what we already knew.  The nonzero return value is the tag of flows
 * that now need revalidation.
 *
 * The 'vlan' parameter is used to maintain separate per-VLAN learning tables.
 * Specify 0 if this behavior is undesirable.
 *
 * 'lock_type' specifies whether the entry should be locked or existing locks
 * are check. */
tag_type
mac_learning_learn(struct mac_learning *ml,
                   const uint8_t src_mac[ETH_ADDR_LEN], uint16_t vlan,
                   uint16_t src_port, enum grat_arp_lock_type lock_type)
{
    struct mac_entry *e;
    struct list *bucket;

    if (!is_learning_vlan(ml, vlan)) {
        return 0;
    }

    if (eth_addr_is_multicast(src_mac)) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 30);
        VLOG_DBG_RL(&rl, "multicast packet source "ETH_ADDR_FMT,
                    ETH_ADDR_ARGS(src_mac));
        return 0;
    }

    bucket = mac_table_bucket(ml, src_mac, vlan);
    e = search_bucket(bucket, src_mac, vlan);
    if (!e) {
        if (!list_is_empty(&ml->free)) {
            e = mac_entry_from_lru_node(ml->free.next);
        } else {
            e = mac_entry_from_lru_node(ml->lrus.next);
            list_remove(&e->hash_node);
        }
        memcpy(e->mac, src_mac, ETH_ADDR_LEN);
        list_push_front(bucket, &e->hash_node);
        e->port = -1;
        e->vlan = vlan;
        e->tag = make_unknown_mac_tag(ml, src_mac, vlan);
        e->grat_arp_lock = TIME_MIN;
    }

    if (lock_type != GRAT_ARP_LOCK_CHECK || time_now() >= e->grat_arp_lock) {
        /* Make the entry most-recently-used. */
        list_remove(&e->lru_node);
        list_push_back(&ml->lrus, &e->lru_node);
        e->expires = time_now() + MAC_ENTRY_IDLE_TIME;
        if (lock_type == GRAT_ARP_LOCK_SET) {
            e->grat_arp_lock = time_now() + MAC_GRAT_ARP_LOCK_TIME;
        }

        /* Did we learn something? */
        if (e->port != src_port) {
            tag_type old_tag = e->tag;
            e->port = src_port;
            e->tag = tag_create_random();
            COVERAGE_INC(mac_learning_learned);
            return old_tag;
        }
    }

    return 0;
}
Beispiel #6
0
/* Shrinks 'hindex', if necessary, to optimize the performance of iteration. */
void
hindex_shrink(struct hindex *hindex)
{
    size_t new_mask = hindex_calc_mask(hindex->n_unique);
    if (new_mask < hindex->mask) {
        COVERAGE_INC(hindex_shrink);
        hindex_resize(hindex, new_mask);
    }
}
Beispiel #7
0
/* Expands 'hmap', if necessary, to optimize the performance of searches when
 * it has up to 'n' elements.  (But iteration will be slow in a hash map whose
 * allocated capacity is much higher than its current number of nodes.)
 *
 * ('where' is used in debug logging.  Commonly one would use hmap_reserve() to
 * automatically provide the caller's source file and line number for
 * 'where'.) */
void
hmap_reserve_at(struct hmap *hmap, size_t n, const char *where)
{
    size_t new_mask = calc_mask(n);
    if (new_mask > hmap->mask) {
        COVERAGE_INC(hmap_reserve);
        resize(hmap, new_mask, where);
    }
}
Beispiel #8
0
/* Shrinks 'hmap', if necessary, to optimize the performance of iteration.
 *
 * ('where' is used in debug logging.  Commonly one would use hmap_shrink() to
 * automatically provide the caller's source file and line number for
 * 'where'.) */
void
hmap_shrink_at(struct hmap *hmap, const char *where)
{
    size_t new_mask = calc_mask(hmap->n);
    if (new_mask < hmap->mask) {
        COVERAGE_INC(hmap_shrink);
        resize(hmap, new_mask, where);
    }
}
Beispiel #9
0
/* Expands 'hindex', if necessary, to optimize the performance of searches when
 * it has up to 'n' unique hashes.  (But iteration will be slow in a hash index
 * whose allocated capacity is much higher than its current number of
 * nodes.)  */
void
hindex_reserve(struct hindex *hindex, size_t n)
{
    size_t new_mask = hindex_calc_mask(n);
    if (new_mask > hindex->mask) {
        COVERAGE_INC(hindex_reserve);
        hindex_resize(hindex, new_mask);
    }
}
Beispiel #10
0
/* Expands 'hindex', if necessary, to optimize the performance of searches. */
void
hindex_expand(struct hindex *hindex)
{
    size_t new_mask = hindex_calc_mask(hindex->n_unique);
    if (new_mask > hindex->mask) {
        COVERAGE_INC(hindex_expand);
        hindex_resize(hindex, new_mask);
    }
}
void *
xmalloc(size_t size)
{
    void *p = malloc(size ? size : 1);
    COVERAGE_INC(util_xalloc);
    if (p == NULL) {
        out_of_memory();
    }
    return p;
}
void *
xcalloc(size_t count, size_t size)
{
    void *p = count && size ? calloc(count, size) : malloc(1);
    COVERAGE_INC(util_xalloc);
    if (p == NULL) {
        out_of_memory();
    }
    return p;
}
void *
xrealloc(void *p, size_t size)
{
    p = realloc(p, size ? size : 1);
    COVERAGE_INC(util_xalloc);
    if (p == NULL) {
        out_of_memory();
    }
    return p;
}
Beispiel #14
0
static uint32_t *
miniflow_alloc_values(struct miniflow *flow, int n)
{
    if (n <= MINI_N_INLINE) {
        return flow->inline_values;
    } else {
        COVERAGE_INC(miniflow_malloc);
        return xmalloc(n * sizeof *flow->values);
    }
}
Beispiel #15
0
void
mac_learning_run(struct mac_learning *ml, struct tag_set *set)
{
    struct mac_entry *e;
    while (get_lru(ml, &e) && time_now() >= e->expires) {
        COVERAGE_INC(mac_learning_expired);
        if (set) {
            tag_set_add(set, e->tag);
        }
        free_mac_entry(ml, e);
    }
}
Beispiel #16
0
/* Starts a subprocess with the arguments in the null-terminated argv[] array.
 * argv[0] is used as the name of the process.  Searches the PATH environment
 * variable to find the program to execute.
 *
 * All file descriptors are closed before executing the subprocess, except for
 * fds 0, 1, and 2 and the 'n_keep_fds' fds listed in 'keep_fds'.  Also, any of
 * the 'n_null_fds' fds listed in 'null_fds' are replaced by /dev/null.
 *
 * Returns 0 if successful, otherwise a positive errno value indicating the
 * error.  If successful, '*pp' is assigned a new struct process that may be
 * used to query the process's status.  On failure, '*pp' is set to NULL. */
int
process_start(char **argv,
              const int keep_fds[], size_t n_keep_fds,
              const int null_fds[], size_t n_null_fds,
              struct process **pp)
{
    sigset_t oldsigs;
    pid_t pid;
    int error;

    *pp = NULL;
    COVERAGE_INC(process_start);
    error = process_prestart(argv);
    if (error) {
        return error;
    }

    block_sigchld(&oldsigs);
    pid = fork();
    if (pid < 0) {
        unblock_sigchld(&oldsigs);
        VLOG_WARN("fork failed: %s", strerror(errno));
        return errno;
    } else if (pid) {
        /* Running in parent process. */
        *pp = process_register(argv[0], pid);
        unblock_sigchld(&oldsigs);
        return 0;
    } else {
        /* Running in child process. */
        int fd_max = get_max_fds();
        int fd;

        fatal_signal_fork();
        unblock_sigchld(&oldsigs);
        for (fd = 0; fd < fd_max; fd++) {
            if (is_member(fd, null_fds, n_null_fds)) {
                /* We can't use get_null_fd() here because we might have
                 * already closed its fd. */
                int nullfd = open("/dev/null", O_RDWR);
                dup2(nullfd, fd);
                close(nullfd);
            } else if (fd >= 3 && !is_member(fd, keep_fds, n_keep_fds)) {
                close(fd);
            }
        }
        execvp(argv[0], argv);
        fprintf(stderr, "execvp(\"%s\") failed: %s\n",
                argv[0], strerror(errno));
        _exit(1);
    }
}
Beispiel #17
0
/* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and
 * 'ofp_in_port'.
 *
 * Initializes 'packet' header pointers as follows:
 *
 *    - packet->l2 to the start of the Ethernet header.
 *
 *    - packet->l2_5 to the start of the MPLS shim header.
 *
 *    - packet->l3 to just past the Ethernet header, or just past the
 *      vlan_header if one is present, to the first byte of the payload of the
 *      Ethernet frame.
 *
 *    - packet->l4 to just past the IPv4 header, if one is present and has a
 *      correct length, and otherwise NULL.
 *
 *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
 *      present and has a correct length, and otherwise NULL.
 */
void
flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark,
             const struct flow_tnl *tnl, uint16_t ofp_in_port,
             struct flow *flow)
{
    struct ofpbuf b = *packet;
    struct eth_header *eth;

    COVERAGE_INC(flow_extract);

    memset(flow, 0, sizeof *flow);

    if (tnl) {
        ovs_assert(tnl != &flow->tunnel);
        flow->tunnel = *tnl;
    }
    flow->in_port = ofp_in_port;
    flow->skb_priority = skb_priority;
    flow->skb_mark = skb_mark;

    packet->l2   = b.data;
    packet->l2_5 = NULL;
    packet->l3   = NULL;
    packet->l4   = NULL;
    packet->l7   = NULL;

    if (b.size < sizeof *eth) {
        return;
    }

    /* Link layer. */
    eth = b.data;
    memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
    memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);

    /* dl_type, vlan_tci. */
    ofpbuf_pull(&b, ETH_ADDR_LEN * 2);
    if (eth->eth_type == htons(ETH_TYPE_VLAN)) {
        parse_vlan(&b, flow);
    }
    flow->dl_type = parse_ethertype(&b);

    /* Parse mpls, copy l3 ttl. */
    if (eth_type_mpls(flow->dl_type)) {
        packet->l2_5 = b.data;
        parse_mpls(&b, flow);
    }

    packet->l3 = b.data;
    flow_extract_l3_onwards(packet, flow, flow->dl_type);
}
static int
nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg,
               uint32_t nlmsg_seq, bool wait)
{
    struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg);
    int error;

    nlmsg->nlmsg_len = msg->size;
    nlmsg->nlmsg_seq = nlmsg_seq;
    nlmsg->nlmsg_pid = sock->pid;
    do {
        int retval;
        retval = send(sock->fd, msg->data, msg->size, wait ? 0 : MSG_DONTWAIT);
        error = retval < 0 ? errno : 0;
    } while (error == EINTR);
    log_nlmsg(__func__, error, msg->data, msg->size, sock->protocol);
    if (!error) {
        COVERAGE_INC(netlink_sent);
    }
    return error;
}
Beispiel #19
0
/* Reallocates 'hindex''s array of buckets to use bitwise mask 'new_mask'. */
static void
hindex_resize(struct hindex *hindex, size_t new_mask)
{
    struct hindex tmp;
    size_t i;

    ovs_assert(is_pow2(new_mask + 1));
    ovs_assert(new_mask != SIZE_MAX);

    hindex_init(&tmp);
    if (new_mask) {
        tmp.buckets = xmalloc(sizeof *tmp.buckets * (new_mask + 1));
        tmp.mask = new_mask;
        for (i = 0; i <= tmp.mask; i++) {
            tmp.buckets[i] = NULL;
        }
    }
    for (i = 0; i <= hindex->mask; i++) {
        struct hindex_node *node, *next;
        int count;

        count = 0;
        for (node = hindex->buckets[i]; node; node = next) {
            struct hindex_node **head = &tmp.buckets[node->hash & tmp.mask];

            next = node->d;
            node->d = *head;
            *head = node;
            count++;
        }
        if (count > 5) {
            COVERAGE_INC(hindex_pathological);
        }
    }
    tmp.n_unique = hindex->n_unique;
    hindex_swap(hindex, &tmp);
    hindex_destroy(&tmp);
}
Beispiel #20
0
/* Starts the process whose arguments are given in the null-terminated array
 * 'argv' and waits for it to exit.  On success returns 0 and stores the
 * process exit value (suitable for passing to process_status_msg()) in
 * '*status'.  On failure, returns a positive errno value and stores 0 in
 * '*status'.
 *
 * If 'stdout_log' is nonnull, then the subprocess's output to stdout (up to a
 * limit of PROCESS_MAX_CAPTURE bytes) is captured in a memory buffer, which
 * when this function returns 0 is stored as a null-terminated string in
 * '*stdout_log'.  The caller is responsible for freeing '*stdout_log' (by
 * passing it to free()).  When this function returns an error, '*stdout_log'
 * is set to NULL.
 *
 * If 'stderr_log' is nonnull, then it is treated like 'stdout_log' except
 * that it captures the subprocess's output to stderr. */
int
process_run_capture(char **argv, char **stdout_log, char **stderr_log,
                    int *status)
{
    struct stream s_stdout, s_stderr;
    sigset_t oldsigs;
    pid_t pid;
    int error;

    COVERAGE_INC(process_run_capture);
    if (stdout_log) {
        *stdout_log = NULL;
    }
    if (stderr_log) {
        *stderr_log = NULL;
    }
    *status = 0;
    error = process_prestart(argv);
    if (error) {
        return error;
    }

    error = stream_open(&s_stdout);
    if (error) {
        return error;
    }

    error = stream_open(&s_stderr);
    if (error) {
        stream_close(&s_stdout);
        return error;
    }

    block_sigchld(&oldsigs);
    pid = fork();
    if (pid < 0) {
        int error = errno;

        unblock_sigchld(&oldsigs);
        VLOG_WARN("fork failed: %s", strerror(error));

        stream_close(&s_stdout);
        stream_close(&s_stderr);
        *status = 0;
        return error;
    } else if (pid) {
        /* Running in parent process. */
        struct process *p;

        p = process_register(argv[0], pid);
        unblock_sigchld(&oldsigs);

        close(s_stdout.fds[1]);
        close(s_stderr.fds[1]);
        while (!process_exited(p)) {
            stream_read(&s_stdout);
            stream_read(&s_stderr);

            stream_wait(&s_stdout);
            stream_wait(&s_stderr);
            process_wait(p);
            poll_block();
        }
        stream_read(&s_stdout);
        stream_read(&s_stderr);

        if (stdout_log) {
            *stdout_log = ds_steal_cstr(&s_stdout.log);
        }
        if (stderr_log) {
            *stderr_log = ds_steal_cstr(&s_stderr.log);
        }

        stream_close(&s_stdout);
        stream_close(&s_stderr);

        *status = process_status(p);
        process_destroy(p);
        return 0;
    } else {
        /* Running in child process. */
        int max_fds;
        int i;

        fatal_signal_fork();
        unblock_sigchld(&oldsigs);

        dup2(get_null_fd(), 0);
        dup2(s_stdout.fds[1], 1);
        dup2(s_stderr.fds[1], 2);

        max_fds = get_max_fds();
        for (i = 3; i < max_fds; i++) {
            close(i);
        }

        execvp(argv[0], argv);
        fprintf(stderr, "execvp(\"%s\") failed: %s\n",
                argv[0], strerror(errno));
        exit(EXIT_FAILURE);
    }
}
/* Initializes 'flow' members from 'packet', 'tun_id', and 'ofp_in_port'.
 * Initializes 'packet' header pointers as follows:
 *
 *    - packet->l2 to the start of the Ethernet header.
 *
 *    - packet->l3 to just past the Ethernet header, or just past the
 *      vlan_header if one is present, to the first byte of the payload of the
 *      Ethernet frame.
 *
 *    - packet->l4 to just past the IPv4 header, if one is present and has a
 *      correct length, and otherwise NULL.
 *
 *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
 *      present and has a correct length, and otherwise NULL.
 */
void
flow_extract(struct ofpbuf *packet, uint32_t priority, ovs_be64 tun_id,
             uint16_t ofp_in_port, struct flow *flow)
{
    struct ofpbuf b = *packet;
    struct eth_header *eth;

    COVERAGE_INC(flow_extract);

    memset(flow, 0, sizeof *flow);
    flow->tun_id = tun_id;
    flow->in_port = ofp_in_port;
    flow->priority = priority;

    packet->l2 = b.data;
    packet->l3 = NULL;
    packet->l4 = NULL;
    packet->l7 = NULL;

    if (b.size < sizeof *eth) {
        return;
    }

    /* Link layer. */
    eth = b.data;
    memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
    memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);

    /* dl_type, vlan_tci. */
    ofpbuf_pull(&b, ETH_ADDR_LEN * 2);
    if (eth->eth_type == htons(ETH_TYPE_VLAN)) {
        parse_vlan(&b, flow);
    }
    flow->dl_type = parse_ethertype(&b);

    /* Network layer. */
    packet->l3 = b.data;
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
        const struct ip_header *nh = pull_ip(&b);
        if (nh) {
            packet->l4 = b.data;

            flow->nw_src = get_unaligned_be32(&nh->ip_src);
            flow->nw_dst = get_unaligned_be32(&nh->ip_dst);
            flow->nw_proto = nh->ip_proto;

            flow->nw_tos = nh->ip_tos;
            if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
                flow->nw_frag = FLOW_NW_FRAG_ANY;
                if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
                    flow->nw_frag |= FLOW_NW_FRAG_LATER;
                }
            }
            flow->nw_ttl = nh->ip_ttl;

            if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) {
                if (flow->nw_proto == IPPROTO_TCP) {
                    parse_tcp(packet, &b, flow);
                } else if (flow->nw_proto == IPPROTO_UDP) {
                    parse_udp(packet, &b, flow);
                } else if (flow->nw_proto == IPPROTO_ICMP) {
                    const struct icmp_header *icmp = pull_icmp(&b);
                    if (icmp) {
                        flow->tp_src = htons(icmp->icmp_type);
                        flow->tp_dst = htons(icmp->icmp_code);
                        packet->l7 = b.data;
                    }
                }
            }
        }
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
        if (parse_ipv6(&b, flow)) {
            return;
        }

        packet->l4 = b.data;
        if (flow->nw_proto == IPPROTO_TCP) {
            parse_tcp(packet, &b, flow);
        } else if (flow->nw_proto == IPPROTO_UDP) {
            parse_udp(packet, &b, flow);
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
            if (parse_icmpv6(&b, flow)) {
                packet->l7 = b.data;
            }
        }
    } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
        const struct arp_eth_header *arp = pull_arp(&b);
        if (arp && arp->ar_hrd == htons(1)
            && arp->ar_pro == htons(ETH_TYPE_IP)
            && arp->ar_hln == ETH_ADDR_LEN
            && arp->ar_pln == 4) {
            /* We only match on the lower 8 bits of the opcode. */
            if (ntohs(arp->ar_op) <= 0xff) {
                flow->nw_proto = ntohs(arp->ar_op);
            }

            if ((flow->nw_proto == ARP_OP_REQUEST)
                || (flow->nw_proto == ARP_OP_REPLY)) {
                flow->nw_src = arp->ar_spa;
                flow->nw_dst = arp->ar_tpa;
                memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN);
                memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN);
            }
        }
    }
}
Beispiel #22
0
/* Initializes 'flow' members from 'packet', 'skb_priority', 'tnl', and
 * 'in_port'.
 *
 * Initializes 'packet' header pointers as follows:
 *
 *    - packet->l2 to the start of the Ethernet header.
 *
 *    - packet->l2_5 to the start of the MPLS shim header.
 *
 *    - packet->l3 to just past the Ethernet header, or just past the
 *      vlan_header if one is present, to the first byte of the payload of the
 *      Ethernet frame.
 *
 *    - packet->l4 to just past the IPv4 header, if one is present and has a
 *      correct length, and otherwise NULL.
 *
 *    - packet->l7 to just past the TCP/UDP/SCTP/ICMP header, if one is
 *      present and has a correct length, and otherwise NULL.
 */
void
flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t pkt_mark,
             const struct flow_tnl *tnl, const union flow_in_port *in_port,
             struct flow *flow)
{
    struct ofpbuf b = *packet;
    struct eth_header *eth;

    COVERAGE_INC(flow_extract);

    memset(flow, 0, sizeof *flow);

    if (tnl) {
        ovs_assert(tnl != &flow->tunnel);
        flow->tunnel = *tnl;
    }
    if (in_port) {
        flow->in_port = *in_port;
    }
    flow->skb_priority = skb_priority;
    flow->pkt_mark = pkt_mark;

    packet->l2   = b.data;
    packet->l2_5 = NULL;
    packet->l3   = NULL;
    packet->l4   = NULL;
    packet->l7   = NULL;

    if (b.size < sizeof *eth) {
        return;
    }

    /* Link layer. */
    eth = b.data;
    memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
    memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);

    /* dl_type, vlan_tci. */
    ofpbuf_pull(&b, ETH_ADDR_LEN * 2);
    if (eth->eth_type == htons(ETH_TYPE_VLAN)) {
        parse_vlan(&b, flow);
    }
    flow->dl_type = parse_ethertype(&b);

    /* Parse mpls, copy l3 ttl. */
    if (eth_type_mpls(flow->dl_type)) {
        packet->l2_5 = b.data;
        parse_mpls(&b, flow);
    }

    /* Network layer. */
    packet->l3 = b.data;
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
        const struct ip_header *nh = pull_ip(&b);
        if (nh) {
            packet->l4 = b.data;

            flow->nw_src = get_16aligned_be32(&nh->ip_src);
            flow->nw_dst = get_16aligned_be32(&nh->ip_dst);
            flow->nw_proto = nh->ip_proto;

            flow->nw_tos = nh->ip_tos;
            if (IP_IS_FRAGMENT(nh->ip_frag_off)) {
                flow->nw_frag = FLOW_NW_FRAG_ANY;
                if (nh->ip_frag_off & htons(IP_FRAG_OFF_MASK)) {
                    flow->nw_frag |= FLOW_NW_FRAG_LATER;
                }
            }
            flow->nw_ttl = nh->ip_ttl;

            if (!(nh->ip_frag_off & htons(IP_FRAG_OFF_MASK))) {
                if (flow->nw_proto == IPPROTO_TCP) {
                    parse_tcp(packet, &b, flow);
                } else if (flow->nw_proto == IPPROTO_UDP) {
                    parse_udp(packet, &b, flow);
                } else if (flow->nw_proto == IPPROTO_SCTP) {
                    parse_sctp(packet, &b, flow);
                } else if (flow->nw_proto == IPPROTO_ICMP) {
                    const struct icmp_header *icmp = pull_icmp(&b);
                    if (icmp) {
                        flow->tp_src = htons(icmp->icmp_type);
                        flow->tp_dst = htons(icmp->icmp_code);
                        packet->l7 = b.data;
                    }
                }
            }
        }
    } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
        if (parse_ipv6(&b, flow)) {
            return;
        }

        packet->l4 = b.data;
        if (flow->nw_proto == IPPROTO_TCP) {
            parse_tcp(packet, &b, flow);
        } else if (flow->nw_proto == IPPROTO_UDP) {
            parse_udp(packet, &b, flow);
        } else if (flow->nw_proto == IPPROTO_SCTP) {
            parse_sctp(packet, &b, flow);
        } else if (flow->nw_proto == IPPROTO_ICMPV6) {
            if (parse_icmpv6(&b, flow)) {
                packet->l7 = b.data;
            }
        }
    } else if (flow->dl_type == htons(ETH_TYPE_ARP) ||
               flow->dl_type == htons(ETH_TYPE_RARP)) {
        const struct arp_eth_header *arp = pull_arp(&b);
        if (arp && arp->ar_hrd == htons(1)
            && arp->ar_pro == htons(ETH_TYPE_IP)
            && arp->ar_hln == ETH_ADDR_LEN
            && arp->ar_pln == 4) {
            /* We only match on the lower 8 bits of the opcode. */
            if (ntohs(arp->ar_op) <= 0xff) {
                flow->nw_proto = ntohs(arp->ar_op);
            }

            flow->nw_src = get_16aligned_be32(&arp->ar_spa);
            flow->nw_dst = get_16aligned_be32(&arp->ar_tpa);
            memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN);
            memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN);
        }
    }
}
static int
nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
{
    /* We can't accurately predict the size of the data to be received.  The
     * caller is supposed to have allocated enough space in 'buf' to handle the
     * "typical" case.  To handle exceptions, we make available enough space in
     * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable
     * figure since that's the maximum length of a Netlink attribute). */
    struct nlmsghdr *nlmsghdr;
#ifdef _WIN32
#define MAX_STACK_LENGTH 81920
    uint8_t tail[MAX_STACK_LENGTH];
#else
    uint8_t tail[65536];
#endif
    struct iovec iov[2];
    struct msghdr msg;
    ssize_t retval;
    int error;

    ovs_assert(buf->allocated >= sizeof *nlmsghdr);
    ofpbuf_clear(buf);

    iov[0].iov_base = ofpbuf_base(buf);
    iov[0].iov_len = buf->allocated;
    iov[1].iov_base = tail;
    iov[1].iov_len = sizeof tail;

    memset(&msg, 0, sizeof msg);
    msg.msg_iov = iov;
    msg.msg_iovlen = 2;

    /* Receive a Netlink message from the kernel.
     *
     * This works around a kernel bug in which the kernel returns an error code
     * as if it were the number of bytes read.  It doesn't actually modify
     * anything in the receive buffer in that case, so we can initialize the
     * Netlink header with an impossible message length and then, upon success,
     * check whether it changed. */
    nlmsghdr = ofpbuf_base(buf);
    do {
        nlmsghdr->nlmsg_len = UINT32_MAX;
#ifdef _WIN32
        boolean result = false;
        DWORD last_error = 0;
        result = ReadFile(sock->handle, tail, MAX_STACK_LENGTH, &retval, NULL);
        last_error = GetLastError();
        if (last_error != ERROR_SUCCESS && !result) {
            retval = -1;
            errno = EAGAIN;
        } else {
            ofpbuf_put(buf, tail, retval);
        }
#else
        retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
#endif
        error = (retval < 0 ? errno
                 : retval == 0 ? ECONNRESET /* not possible? */
                 : nlmsghdr->nlmsg_len != UINT32_MAX ? 0
                 : retval);
    } while (error == EINTR);
    if (error) {
        if (error == ENOBUFS) {
            /* Socket receive buffer overflow dropped one or more messages that
             * the kernel tried to send to us. */
            COVERAGE_INC(netlink_overflow);
        }
        return error;
    }

    if (msg.msg_flags & MSG_TRUNC) {
        VLOG_ERR_RL(&rl, "truncated message (longer than %"PRIuSIZE" bytes)",
                    sizeof tail);
        return E2BIG;
    }

    if (retval < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len > retval) {
        VLOG_ERR_RL(&rl, "received invalid nlmsg (%"PRIuSIZE" bytes < %"PRIuSIZE")",
                    retval, sizeof *nlmsghdr);
        return EPROTO;
    }
#ifndef _WIN32
    ofpbuf_set_size(buf, MIN(retval, buf->allocated));
    if (retval > buf->allocated) {
        COVERAGE_INC(netlink_recv_jumbo);
        ofpbuf_put(buf, tail, retval - buf->allocated);
    }
#endif

    log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol);
    COVERAGE_INC(netlink_received);

    return 0;
}
static int
nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
{
    /* We can't accurately predict the size of the data to be received.  The
     * caller is supposed to have allocated enough space in 'buf' to handle the
     * "typical" case.  To handle exceptions, we make available enough space in
     * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable
     * figure since that's the maximum length of a Netlink attribute). */
    struct nlmsghdr *nlmsghdr;
    uint8_t tail[65536];
    struct iovec iov[2];
    struct msghdr msg;
    ssize_t retval;

    ovs_assert(buf->allocated >= sizeof *nlmsghdr);
    ofpbuf_clear(buf);

    iov[0].iov_base = buf->base;
    iov[0].iov_len = buf->allocated;
    iov[1].iov_base = tail;
    iov[1].iov_len = sizeof tail;

    memset(&msg, 0, sizeof msg);
    msg.msg_iov = iov;
    msg.msg_iovlen = 2;

    do {
        retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
    } while (retval < 0 && errno == EINTR);

    if (retval < 0) {
        int error = errno;
        if (error == ENOBUFS) {
            /* Socket receive buffer overflow dropped one or more messages that
             * the kernel tried to send to us. */
            COVERAGE_INC(netlink_overflow);
        }
        return error;
    }

    if (msg.msg_flags & MSG_TRUNC) {
        VLOG_ERR_RL(&rl, "truncated message (longer than %zu bytes)",
                    sizeof tail);
        return E2BIG;
    }

    nlmsghdr = buf->data;
    if (retval < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len > retval) {
        VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %zu)",
                    retval, sizeof *nlmsghdr);
        return EPROTO;
    }

    buf->size = MIN(retval, buf->allocated);
    if (retval > buf->allocated) {
        COVERAGE_INC(netlink_recv_jumbo);
        ofpbuf_put(buf, tail, retval - buf->allocated);
    }

    log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
    COVERAGE_INC(netlink_received);

    return 0;
}
Beispiel #25
0
/* 'tun_id' is in network byte order, while 'in_port' is in host byte order.
 * These byte orders are the same as they are in struct odp_flow_key.
 *
 * Initializes packet header pointers as follows:
 *
 *    - packet->l2 to the start of the Ethernet header.
 *
 *    - packet->l3 to just past the Ethernet header, or just past the
 *      vlan_header if one is present, to the first byte of the payload of the
 *      Ethernet frame.
 *
 *    - packet->l4 to just past the IPv4 header, if one is present and has a
 *      correct length, and otherwise NULL.
 *
 *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
 *      present and has a correct length, and otherwise NULL.
 */
int
flow_extract(struct ofpbuf *packet, uint32_t tun_id, uint16_t in_port,
             flow_t *flow)
{
    struct ofpbuf b = *packet;
    struct eth_header *eth;
    int retval = 0;

    COVERAGE_INC(flow_extract);

    memset(flow, 0, sizeof *flow);
    flow->tun_id = tun_id;
    flow->in_port = in_port;
    flow->dl_vlan = htons(OFP_VLAN_NONE);

    packet->l2 = b.data;
    packet->l3 = NULL;
    packet->l4 = NULL;
    packet->l7 = NULL;

    if (b.size < sizeof *eth) {
        return 0;
    }

    /* Link layer. */
    eth = b.data;
    memcpy(flow->dl_src, eth->eth_src, ETH_ADDR_LEN);
    memcpy(flow->dl_dst, eth->eth_dst, ETH_ADDR_LEN);

    /* dl_type, dl_vlan, dl_vlan_pcp. */
    ofpbuf_pull(&b, ETH_ADDR_LEN * 2);
    if (eth->eth_type == htons(ETH_TYPE_VLAN)) {
        parse_vlan(&b, flow);
    }
    flow->dl_type = parse_ethertype(&b);

    /* Network layer. */
    packet->l3 = b.data;
    if (flow->dl_type == htons(ETH_TYPE_IP)) {
        const struct ip_header *nh = pull_ip(&b);
        if (nh) {
            flow->nw_src = get_unaligned_u32(&nh->ip_src);
            flow->nw_dst = get_unaligned_u32(&nh->ip_dst);
            flow->nw_tos = nh->ip_tos & IP_DSCP_MASK;
            flow->nw_proto = nh->ip_proto;
            packet->l4 = b.data;
            if (!IP_IS_FRAGMENT(nh->ip_frag_off)) {
                if (flow->nw_proto == IP_TYPE_TCP) {
                    const struct tcp_header *tcp = pull_tcp(&b);
                    if (tcp) {
                        flow->tp_src = tcp->tcp_src;
                        flow->tp_dst = tcp->tcp_dst;
                        packet->l7 = b.data;
                    }
                } else if (flow->nw_proto == IP_TYPE_UDP) {
                    const struct udp_header *udp = pull_udp(&b);
                    if (udp) {
                        flow->tp_src = udp->udp_src;
                        flow->tp_dst = udp->udp_dst;
                        packet->l7 = b.data;
                    }
                } else if (flow->nw_proto == IP_TYPE_ICMP) {
                    const struct icmp_header *icmp = pull_icmp(&b);
                    if (icmp) {
                        flow->icmp_type = htons(icmp->icmp_type);
                        flow->icmp_code = htons(icmp->icmp_code);
                        packet->l7 = b.data;
                    }
                }
            } else {
                retval = 1;
            }
        }
    } else if (flow->dl_type == htons(ETH_TYPE_ARP)) {
        const struct arp_eth_header *arp = pull_arp(&b);
        if (arp && arp->ar_hrd == htons(1)
            && arp->ar_pro == htons(ETH_TYPE_IP)
            && arp->ar_hln == ETH_ADDR_LEN
            && arp->ar_pln == 4) {
            /* We only match on the lower 8 bits of the opcode. */
            if (ntohs(arp->ar_op) <= 0xff) {
                flow->nw_proto = ntohs(arp->ar_op);
            }

            if ((flow->nw_proto == ARP_OP_REQUEST)
                || (flow->nw_proto == ARP_OP_REPLY)) {
                flow->nw_src = arp->ar_spa;
                flow->nw_dst = arp->ar_tpa;
            }
        }
    }
    return retval;
}