Example #1
0
int
check_connection_completion(int fd)
{
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
    struct pollfd pfd;
    int retval;

    pfd.fd = fd;
    pfd.events = POLLOUT;

#ifndef _WIN32
    do {
        retval = poll(&pfd, 1, 0);
    } while (retval < 0 && errno == EINTR);
#else
    retval = WSAPoll(&pfd, 1, 0);
#endif
    if (retval == 1) {
        if (pfd.revents & POLLERR) {
            ssize_t n = send(fd, "", 1, 0);
            if (n < 0) {
                return sock_errno();
            } else {
                VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded");
                return EPROTO;
            }
        }
        return 0;
    } else if (retval < 0) {
        VLOG_ERR_RL(&rl, "poll: %s", sock_strerror(sock_errno()));
        return errno;
    } else {
        return EAGAIN;
    }
}
Example #2
0
static int
stream_recv(struct vconn *vconn, struct ofpbuf **bufferp)
{
    struct stream_vconn *s = stream_vconn_cast(vconn);
    struct ofpbuf *rx;
    size_t want_bytes;
    ssize_t retval;

    if (s->rxbuf == NULL) {
        s->rxbuf = ofpbuf_new(1564);
    }
    rx = s->rxbuf;

again:
    if (sizeof(struct ofp_header) > rx->size) {
        want_bytes = sizeof(struct ofp_header) - rx->size;
    } else {
        struct ofp_header *oh = rx->data;
        size_t length = ntohs(oh->length);
        if (length < sizeof(struct ofp_header)) {
            VLOG_ERR_RL(LOG_MODULE, &rl, "received too-short ofp_header (%zu bytes)",
                        length);
            return EPROTO;
        }
        want_bytes = length - rx->size;
        if (!want_bytes) {
            *bufferp = rx;
            s->rxbuf = NULL;
            return 0;
        }
    }
    ofpbuf_prealloc_tailroom(rx, want_bytes);

    retval = read(s->fd, ofpbuf_tail(rx), want_bytes);
    if (retval > 0) {
        rx->size += retval;
        if (retval == want_bytes) {
            if (rx->size > sizeof(struct ofp_header)) {
                *bufferp = rx;
                s->rxbuf = NULL;
                return 0;
            } else {
                goto again;
            }
        }
        return EAGAIN;
    } else if (retval == 0) {
        if (rx->size) {
            VLOG_ERR_RL(LOG_MODULE, &rl, "connection dropped mid-packet");
            return EPROTO;
        } else {
            return EOF;
        }
    } else {
        return errno;
    }
}
Example #3
0
int
check_connection_completion(int fd)
{
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 10);
    struct pollfd pfd;
    int retval;

    pfd.fd = fd;
    pfd.events = POLLOUT;

#ifndef _WIN32
    do {
        retval = poll(&pfd, 1, 0);
    } while (retval < 0 && errno == EINTR);
#else
    fd_set wrset, exset;
    FD_ZERO(&wrset);
    FD_ZERO(&exset);
    FD_SET(fd, &exset);
    FD_SET(fd, &wrset);
    pfd.revents = 0;
    struct timeval tv = { 0, 0 };
    /* WSAPoll is broken on Windows, instead do a select */
    retval = select(0, NULL, &wrset, &exset, &tv);
    if (retval == 1) {
        if (FD_ISSET(fd, &wrset)) {
            pfd.revents |= pfd.events;
        }
        if (FD_ISSET(fd, &exset)) {
            pfd.revents |= POLLERR;
        }
    }
#endif
    if (retval == 1) {
        if (pfd.revents & POLLERR) {
            ssize_t n = send(fd, "", 1, 0);
            if (n < 0) {
                return sock_errno();
            } else {
                VLOG_ERR_RL(&rl, "poll return POLLERR but send succeeded");
                return EPROTO;
            }
        }
        return 0;
    } else if (retval < 0) {
        VLOG_ERR_RL(&rl, "poll: %s", sock_strerror(sock_errno()));
        return errno;
    } else {
        return EAGAIN;
    }
}
Example #4
0
/* Perform all of the per-loop processing. */
static void
daemon_run(void)
{
    unsigned int new_idl_seqno = ovsdb_idl_get_seqno(idl);

    ovsdb_idl_run(idl);

    if (ovsdb_idl_is_lock_contended(idl)) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
        VLOG_ERR_RL(&rl, "another ops-usermgmt process is running, "
                    "disabling this process until it goes away");
        return;
    } else if (!ovsdb_idl_has_lock(idl)) {
        return;
    }

    /* Acquired lock, we're officially ops-usermgmt now. */

    if (!populated) {
        /* First time we got this far, populate database from passwd. */
        if (sync_to_db())
            populated = true;
        daemonize_complete();
        vlog_enable_async();
        VLOG_INFO_ONCE("%s (OpenSwitch usermgmt)", program_name);
    }

    if (new_idl_seqno == idl_seqno)
        return;

    /* Change in OVSDB detected. */
    idl_seqno =  new_idl_seqno;

    sync_from_db();
}
void
classifierd_run(void)
{
    struct ovsdb_idl_txn *txn;

    /* Process a batch of messages from OVSDB. */
    ovsdb_idl_run(idl);

    if (ovsdb_idl_is_lock_contended(idl)) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);

        VLOG_ERR_RL(&rl, "Another classifierd process is running, "
                    "disabling this process until it goes away");
        return;
    } else if (!ovsdb_idl_has_lock(idl)) {
        return;
    }

    /* Nothing to do until system has been configured, i.e. cur_cfg > 0. */
    if (!classifierd_system_is_configured()) {
        return;
    }

    /* Update the local configuration and push any changes to the dB. */
    txn = ovsdb_idl_txn_create(idl);
    if (classifierd_reconfigure()) {
        VLOG_DBG("%s: Committing changes\n",__FUNCTION__);
        /* Some OVSDB write needs to happen. */
        ovsdb_idl_txn_commit_block(txn);
    }
    ovsdb_idl_txn_destroy(txn);

    return;
} /* classifierd_run */
Example #6
0
/* Sends the 'request' member of the 'n' transactions in 'transactions' on
 * 'sock', in order, and receives responses to all of them.  Fills in the
 * 'error' member of each transaction with 0 if it was successful, otherwise
 * with a positive errno value.  If 'reply' is nonnull, then it will be filled
 * with the reply if the message receives a detailed reply.  In other cases,
 * i.e. where the request failed or had no reply beyond an indication of
 * success, 'reply' will be cleared if it is nonnull.
 *
 * The caller is responsible for destroying each request and reply, and the
 * transactions array itself.
 *
 * Before sending each message, this function will finalize nlmsg_len in each
 * 'request' to match the ofpbuf's size,  set nlmsg_pid to 'sock''s pid, and
 * initialize nlmsg_seq.
 *
 * Bare Netlink is an unreliable transport protocol.  This function layers
 * reliable delivery and reply semantics on top of bare Netlink.  See
 * nl_sock_transact() for some caveats.
 */
void
nl_sock_transact_multiple(struct nl_sock *sock,
                          struct nl_transaction **transactions, size_t n)
{
    int max_batch_count;
    int error;

    if (!n) {
        return;
    }

    error = nl_sock_cow__(sock);
    if (error) {
        nl_sock_record_errors__(transactions, n, error);
        return;
    }

    /* In theory, every request could have a 64 kB reply.  But the default and
     * maximum socket rcvbuf size with typical Dom0 memory sizes both tend to
     * be a bit below 128 kB, so that would only allow a single message in a
     * "batch".  So we assume that replies average (at most) 4 kB, which allows
     * a good deal of batching.
     *
     * In practice, most of the requests that we batch either have no reply at
     * all or a brief reply. */
    max_batch_count = MAX(sock->rcvbuf / 4096, 1);
    max_batch_count = MIN(max_batch_count, max_iovs);

    while (n > 0) {
        size_t count, bytes;
        size_t done;

        /* Batch up to 'max_batch_count' transactions.  But cap it at about a
         * page of requests total because big skbuffs are expensive to
         * allocate in the kernel.  */
#if defined(PAGESIZE)
        enum { MAX_BATCH_BYTES = MAX(1, PAGESIZE - 512) };
#else
        enum { MAX_BATCH_BYTES = 4096 - 512 };
#endif
        bytes = transactions[0]->request->size;
        for (count = 1; count < n && count < max_batch_count; count++) {
            if (bytes + transactions[count]->request->size > MAX_BATCH_BYTES) {
                break;
            }
            bytes += transactions[count]->request->size;
        }

        error = nl_sock_transact_multiple__(sock, transactions, count, &done);
        transactions += done;
        n -= done;

        if (error == ENOBUFS) {
            VLOG_DBG_RL(&rl, "receive buffer overflow, resending request");
        } else if (error) {
            VLOG_ERR_RL(&rl, "transaction error (%s)", strerror(error));
            nl_sock_record_errors__(transactions, n, error);
        }
    }
}
Example #7
0
/* Translates 'host_name', which must be a string representation of an IPv6
 * address, into a numeric IPv6 address in '*addr'.  Returns 0 if successful,
 * otherwise a positive errno value. */
int
lookup_ipv6(const char *host_name, struct in6_addr *addr)
{
    if (!ipv6_parse(host_name, addr)) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IPv6 address", host_name);
        return ENOENT;
    }
    return 0;
}
Example #8
0
/* Translates 'host_name', which must be a string representation of an IP
 * address, into a numeric IP address in '*addr'.  Returns 0 if successful,
 * otherwise a positive errno value. */
int
lookup_ip(const char *host_name, struct in_addr *addr)
{
    if (!inet_pton(AF_INET, host_name, addr)) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
        VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name);
        return ENOENT;
    }
    return 0;
}
Example #9
0
static void
interpret_queued_ssl_error(const char *function)
{
    int queued_error = ERR_get_error();
    if (queued_error != 0) {
        VLOG_WARN_RL(&rl, "%s: %s",
                     function, ERR_error_string(queued_error, NULL));
    } else {
        VLOG_ERR_RL(&rl, "%s: SSL_ERROR_SSL without queued error", function);
    }
}
Example #10
0
static void
stream_do_tx(int fd UNUSED, short int revents UNUSED, void *vconn_)
{
    struct vconn *vconn = vconn_;
    struct stream_vconn *s = stream_vconn_cast(vconn);
    ssize_t n = write(s->fd, s->txbuf->data, s->txbuf->size);
    if (n < 0) {
        if (errno != EAGAIN) {
            VLOG_ERR_RL(LOG_MODULE, &rl, "send: %s", strerror(errno));
            stream_clear_txbuf(s);
            return;
        }
    } else if (n > 0) {
        ofpbuf_pull(s->txbuf, n);
        if (!s->txbuf->size) {
            stream_clear_txbuf(s);
            return;
        }
    }
    s->tx_waiter = poll_fd_callback(s->fd, POLLOUT, stream_do_tx, vconn);
}
static int
ssl_recv(struct vconn *vconn, struct ofpbuf **bufferp)
{
    struct ssl_vconn *sslv = ssl_vconn_cast(vconn);
    struct ofpbuf *rx;
    size_t want_bytes;
    int old_state;
    ssize_t ret;

    if (sslv->rxbuf == NULL) {
        sslv->rxbuf = ofpbuf_new(1564);
    }
    rx = sslv->rxbuf;

again:
    if (sizeof(struct ofp_header) > rx->size) {
        want_bytes = sizeof(struct ofp_header) - rx->size;
    } else {
        struct ofp_header *oh = rx->data;
        size_t length = ntohs(oh->length);
        if (length < sizeof(struct ofp_header)) {
            VLOG_ERR_RL(&rl, "received too-short ofp_header (%zu bytes)",
                        length);
            return EPROTO;
        }
        want_bytes = length - rx->size;
        if (!want_bytes) {
            *bufferp = rx;
            sslv->rxbuf = NULL;
            return 0;
        }
    }
    ofpbuf_prealloc_tailroom(rx, want_bytes);

    /* Behavior of zero-byte SSL_read is poorly defined. */
    assert(want_bytes > 0);

    old_state = SSL_get_state(sslv->ssl);
    ret = SSL_read(sslv->ssl, ofpbuf_tail(rx), want_bytes);
    if (old_state != SSL_get_state(sslv->ssl)) {
        sslv->tx_want = SSL_NOTHING;
        if (sslv->tx_waiter) {
            poll_cancel(sslv->tx_waiter);
            ssl_tx_poll_callback(sslv->fd, POLLIN, vconn);
        }
    }
    sslv->rx_want = SSL_NOTHING;

    if (ret > 0) {
        rx->size += ret;
        if (ret == want_bytes) {
            if (rx->size > sizeof(struct ofp_header)) {
                *bufferp = rx;
                sslv->rxbuf = NULL;
                return 0;
            } else {
                goto again;
            }
        }
        return EAGAIN;
    } else {
        int error = SSL_get_error(sslv->ssl, ret);
        if (error == SSL_ERROR_ZERO_RETURN) {
            /* Connection closed (EOF). */
            if (rx->size) {
                VLOG_WARN_RL(&rl, "SSL_read: unexpected connection close");
                return EPROTO;
            } else {
                return EOF;
            }
        } else {
            return interpret_ssl_error("SSL_read", ret, error, &sslv->rx_want);
        }
    }
}
Example #12
0
/* Takes care of necessary 'sw' activity, except for receiving packets (which
 * the caller must do). */
void
lswitch_run(struct lswitch *sw, struct rconn *rconn)
{
    long long int now = time_msec();

    if (sw->ml) {
        mac_learning_run(sw->ml, NULL);
    }

    /* If we're waiting for more replies, keeping waiting for up to 10 s. */
    if (sw->last_reply != LLONG_MIN) {
        if (now - sw->last_reply > 10000) {
            VLOG_ERR_RL(&rl, "%012llx: No more flow stat replies last 10 s",
                        sw->datapath_id);
            sw->last_reply = LLONG_MIN;
            sw->last_query = LLONG_MIN;
            schedule_query(sw, 0);
        } else {
            return;
        }
    }

    /* If we're waiting for any reply at all, keep waiting for up to 10 s. */
    if (sw->last_query != LLONG_MIN) {
        if (now - sw->last_query > 10000) {
            VLOG_ERR_RL(&rl, "%012llx: No flow stat replies in last 10 s",
                        sw->datapath_id);
            sw->last_query = LLONG_MIN;
            schedule_query(sw, 0);
        } else {
            return;
        }
    }

    /* If it's time to send another query, do so. */
    if (sw->next_query != LLONG_MIN && now >= sw->next_query) {
        sw->next_query = LLONG_MIN;
        if (!rconn_is_connected(rconn)) {
            schedule_query(sw, 1000);
        } else {
            struct ofp_stats_request *osr;
            struct ofp_flow_stats_request *ofsr;
            struct ofpbuf *b;
            int error;

            VLOG_DBG("%012llx: Sending flow stats request to implement STP",
                     sw->datapath_id);

            sw->last_query = now;
            sw->query_xid = random_uint32();
            sw->n_flows = 0;
            sw->n_no_recv = 0;
            sw->n_no_send = 0;
            osr = make_openflow_xid(sizeof *osr + sizeof *ofsr,
                                    OFPT_STATS_REQUEST, sw->query_xid, &b);
            osr->type = htons(OFPST_FLOW);
            osr->flags = htons(0);
            ofsr = (struct ofp_flow_stats_request *) osr->body;
            ofsr->match.wildcards = htonl(OFPFW_ALL);
            ofsr->table_id = 0xff;
            ofsr->out_port = htons(OFPP_NONE);

            error = rconn_send(rconn, b, NULL);
            if (error) {
                VLOG_WARN_RL(&rl, "%012llx: sending flow stats request "
                             "failed: %s", sw->datapath_id, strerror(error));
                ofpbuf_delete(b);
                schedule_query(sw, 1000);
            }
        }
    }
}
Example #13
0
static int
interpret_ssl_error(const char *function, int ret, int error,
                    int *want)
{
    *want = SSL_NOTHING;

    switch (error) {
    case SSL_ERROR_NONE:
        VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_NONE", function);
        break;

    case SSL_ERROR_ZERO_RETURN:
        VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_ZERO_RETURN", function);
        break;

    case SSL_ERROR_WANT_READ:
        *want = SSL_READING;
        return EAGAIN;

    case SSL_ERROR_WANT_WRITE:
        *want = SSL_WRITING;
        return EAGAIN;

    case SSL_ERROR_WANT_CONNECT:
        VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_CONNECT", function);
        break;

    case SSL_ERROR_WANT_ACCEPT:
        VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_ACCEPT", function);
        break;

    case SSL_ERROR_WANT_X509_LOOKUP:
        VLOG_ERR_RL(&rl, "%s: unexpected SSL_ERROR_WANT_X509_LOOKUP",
                    function);
        break;

    case SSL_ERROR_SYSCALL: {
        int queued_error = ERR_get_error();
        if (queued_error == 0) {
            if (ret < 0) {
                int status = errno;
                VLOG_WARN_RL(&rl, "%s: system error (%s)",
                             function, ovs_strerror(status));
                return status;
            } else {
                VLOG_WARN_RL(&rl, "%s: unexpected SSL connection close",
                             function);
                return EPROTO;
            }
        } else {
            VLOG_WARN_RL(&rl, "%s: %s",
                         function, ERR_error_string(queued_error, NULL));
            break;
        }
    }

    case SSL_ERROR_SSL:
        interpret_queued_ssl_error(function);
        break;

    default:
        VLOG_ERR_RL(&rl, "%s: bad SSL error code %d", function, error);
        break;
    }
    return EIO;
}
Example #14
0
static int
nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
{
    /* We can't accurately predict the size of the data to be received.  The
     * caller is supposed to have allocated enough space in 'buf' to handle the
     * "typical" case.  To handle exceptions, we make available enough space in
     * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable
     * figure since that's the maximum length of a Netlink attribute). */
    struct nlmsghdr *nlmsghdr;
#ifdef _WIN32
#define MAX_STACK_LENGTH 81920
    uint8_t tail[MAX_STACK_LENGTH];
#else
    uint8_t tail[65536];
#endif
    struct iovec iov[2];
    struct msghdr msg;
    ssize_t retval;
    int error;

    ovs_assert(buf->allocated >= sizeof *nlmsghdr);
    ofpbuf_clear(buf);

    iov[0].iov_base = ofpbuf_base(buf);
    iov[0].iov_len = buf->allocated;
    iov[1].iov_base = tail;
    iov[1].iov_len = sizeof tail;

    memset(&msg, 0, sizeof msg);
    msg.msg_iov = iov;
    msg.msg_iovlen = 2;

    /* Receive a Netlink message from the kernel.
     *
     * This works around a kernel bug in which the kernel returns an error code
     * as if it were the number of bytes read.  It doesn't actually modify
     * anything in the receive buffer in that case, so we can initialize the
     * Netlink header with an impossible message length and then, upon success,
     * check whether it changed. */
    nlmsghdr = ofpbuf_base(buf);
    do {
        nlmsghdr->nlmsg_len = UINT32_MAX;
#ifdef _WIN32
        boolean result = false;
        DWORD last_error = 0;
        result = ReadFile(sock->handle, tail, MAX_STACK_LENGTH, &retval, NULL);
        last_error = GetLastError();
        if (last_error != ERROR_SUCCESS && !result) {
            retval = -1;
            errno = EAGAIN;
        } else {
            ofpbuf_put(buf, tail, retval);
        }
#else
        retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
#endif
        error = (retval < 0 ? errno
                 : retval == 0 ? ECONNRESET /* not possible? */
                 : nlmsghdr->nlmsg_len != UINT32_MAX ? 0
                 : retval);
    } while (error == EINTR);
    if (error) {
        if (error == ENOBUFS) {
            /* Socket receive buffer overflow dropped one or more messages that
             * the kernel tried to send to us. */
            COVERAGE_INC(netlink_overflow);
        }
        return error;
    }

    if (msg.msg_flags & MSG_TRUNC) {
        VLOG_ERR_RL(&rl, "truncated message (longer than %"PRIuSIZE" bytes)",
                    sizeof tail);
        return E2BIG;
    }

    if (retval < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len > retval) {
        VLOG_ERR_RL(&rl, "received invalid nlmsg (%"PRIuSIZE" bytes < %"PRIuSIZE")",
                    retval, sizeof *nlmsghdr);
        return EPROTO;
    }
#ifndef _WIN32
    ofpbuf_set_size(buf, MIN(retval, buf->allocated));
    if (retval > buf->allocated) {
        COVERAGE_INC(netlink_recv_jumbo);
        ofpbuf_put(buf, tail, retval - buf->allocated);
    }
#endif

    log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol);
    COVERAGE_INC(netlink_received);

    return 0;
}
/* Blocks until one or more of the events registered with poll_fd_wait()
 * occurs, or until the minimum duration registered with poll_timer_wait()
 * elapses, or not at all if poll_immediate_wake() has been called.
 *
 * Also executes any autonomous subroutines registered with poll_fd_callback(),
 * if their file descriptors have become ready. */
void
poll_block(void)
{
    static struct pollfd *pollfds;
    static size_t max_pollfds;

    struct poll_waiter *pw;
    struct list *node;
    int n_pollfds;
    int retval;

    assert(!running_cb);
    if (max_pollfds < n_waiters) {
        max_pollfds = n_waiters;
        pollfds = xrealloc(pollfds, max_pollfds * sizeof *pollfds);
    }

    n_pollfds = 0;
    LIST_FOR_EACH (pw, struct poll_waiter, node, &waiters) {
        pw->pollfd = &pollfds[n_pollfds];
        pollfds[n_pollfds].fd = pw->fd;
        pollfds[n_pollfds].events = pw->events;
        pollfds[n_pollfds].revents = 0;
        n_pollfds++;
    }

    retval = time_poll(pollfds, n_pollfds, timeout);
    if (retval < 0) {
        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
        VLOG_ERR_RL(&rl, "poll: %s", strerror(-retval));
    } else if (!retval && VLOG_IS_DBG_ENABLED()) {
        log_wakeup(&timeout_backtrace, "%d-ms timeout", timeout);
    }

    for (node = waiters.next; node != &waiters; ) {
        pw = CONTAINER_OF(node, struct poll_waiter, node);
        if (!pw->pollfd || !pw->pollfd->revents) {
            if (pw->function) {
                node = node->next;
                continue;
            }
        } else {
            if (VLOG_IS_DBG_ENABLED()) {
                log_wakeup(pw->backtrace, "%s%s%s%s%s on fd %d",
                           pw->pollfd->revents & POLLIN ? "[POLLIN]" : "",
                           pw->pollfd->revents & POLLOUT ? "[POLLOUT]" : "",
                           pw->pollfd->revents & POLLERR ? "[POLLERR]" : "",
                           pw->pollfd->revents & POLLHUP ? "[POLLHUP]" : "",
                           pw->pollfd->revents & POLLNVAL ? "[POLLNVAL]" : "",
                           pw->fd);
            }

            if (pw->function) {
#ifndef NDEBUG
                running_cb = pw;
#endif
                pw->function(pw->fd, pw->pollfd->revents, pw->aux);
#ifndef NDEBUG
                running_cb = NULL;
#endif
            }
        }
        node = node->next;
        poll_cancel(pw);
    }

    timeout = -1;
    timeout_backtrace.n_frames = 0;
}
Example #16
0
static struct ofpbuf *
ofperr_encode_msg__(enum ofperr error, enum ofp_version ofp_version,
                    ovs_be32 xid, const void *data, size_t data_len)
{
    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
    const struct ofperr_domain *domain;
    const struct triplet *triplet;
    struct ofp_error_msg *oem;
    struct ofpbuf *buf;

    /* Get the error domain for 'ofp_version', or fall back to OF1.0. */
    domain = ofperr_domain_from_version(ofp_version);
    if (!domain) {
        VLOG_ERR_RL(&rl, "cannot encode error for unknown OpenFlow "
                    "version 0x%02x", ofp_version);
        domain = &ofperr_of10;
    }

    /* Make sure 'error' is valid in 'domain', or use a fallback error. */
    if (!ofperr_is_valid(error)) {
        /* 'error' seems likely to be a system errno value. */
        VLOG_ERR_RL(&rl, "invalid OpenFlow error code %d (%s)",
                    error, ovs_strerror(error));
        error = OFPERR_NXBRC_UNENCODABLE_ERROR;
    } else if (domain->errors[error - OFPERR_OFS].code < 0) {
        VLOG_ERR_RL(&rl, "cannot encode %s for %s",
                    ofperr_get_name(error), domain->name);
        error = OFPERR_NXBRC_UNENCODABLE_ERROR;
    }

    triplet = ofperr_get_triplet__(error, domain);
    if (!triplet->vendor) {
        buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid,
                               sizeof *oem + data_len);

        oem = ofpbuf_put_uninit(buf, sizeof *oem);
        oem->type = htons(triplet->type);
        oem->code = htons(triplet->code);
    } else if (ofp_version <= OFP11_VERSION) {
        struct nx_vendor_error *nve;

        buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid,
                               sizeof *oem + sizeof *nve + data_len);

        oem = ofpbuf_put_uninit(buf, sizeof *oem);
        oem->type = htons(NXET_VENDOR);
        oem->code = htons(NXVC_VENDOR_ERROR);

        nve = ofpbuf_put_uninit(buf, sizeof *nve);
        nve->vendor = htonl(triplet->vendor);
        nve->type = htons(triplet->type);
        nve->code = htons(triplet->code);
    } else {
        ovs_be32 vendor = htonl(triplet->vendor);

        buf = ofpraw_alloc_xid(OFPRAW_OFPT_ERROR, domain->version, xid,
                               sizeof *oem + sizeof(uint32_t) + data_len);

        oem = ofpbuf_put_uninit(buf, sizeof *oem);
        oem->type = htons(OFPET12_EXPERIMENTER);
        oem->code = htons(triplet->type);
        ofpbuf_put(buf, &vendor, sizeof vendor);
    }

    ofpbuf_put(buf, data, MIN(data_len, UINT16_MAX - buf->size));
    ofpmsg_update_length(buf);

    return buf;
}
static int
nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
{
    /* We can't accurately predict the size of the data to be received.  The
     * caller is supposed to have allocated enough space in 'buf' to handle the
     * "typical" case.  To handle exceptions, we make available enough space in
     * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable
     * figure since that's the maximum length of a Netlink attribute). */
    struct nlmsghdr *nlmsghdr;
    uint8_t tail[65536];
    struct iovec iov[2];
    struct msghdr msg;
    ssize_t retval;

    ovs_assert(buf->allocated >= sizeof *nlmsghdr);
    ofpbuf_clear(buf);

    iov[0].iov_base = buf->base;
    iov[0].iov_len = buf->allocated;
    iov[1].iov_base = tail;
    iov[1].iov_len = sizeof tail;

    memset(&msg, 0, sizeof msg);
    msg.msg_iov = iov;
    msg.msg_iovlen = 2;

    do {
        retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT);
    } while (retval < 0 && errno == EINTR);

    if (retval < 0) {
        int error = errno;
        if (error == ENOBUFS) {
            /* Socket receive buffer overflow dropped one or more messages that
             * the kernel tried to send to us. */
            COVERAGE_INC(netlink_overflow);
        }
        return error;
    }

    if (msg.msg_flags & MSG_TRUNC) {
        VLOG_ERR_RL(&rl, "truncated message (longer than %zu bytes)",
                    sizeof tail);
        return E2BIG;
    }

    nlmsghdr = buf->data;
    if (retval < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len < sizeof *nlmsghdr
        || nlmsghdr->nlmsg_len > retval) {
        VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %zu)",
                    retval, sizeof *nlmsghdr);
        return EPROTO;
    }

    buf->size = MIN(retval, buf->allocated);
    if (retval > buf->allocated) {
        COVERAGE_INC(netlink_recv_jumbo);
        ofpbuf_put(buf, tail, retval - buf->allocated);
    }

    log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
    COVERAGE_INC(netlink_received);

    return 0;
}