/* Attempts to retrieve another reply from 'dump', which must have been * initialized with nl_dump_start(). * * If successful, returns true and points 'reply->data' and 'reply->size' to * the message that was retrieved. The caller must not modify 'reply' (because * it points into the middle of a larger buffer). * * On failure, returns false and sets 'reply->data' to NULL and 'reply->size' * to 0. Failure might indicate an actual error or merely the end of replies. * An error status for the entire dump operation is provided when it is * completed by calling nl_dump_done(). */ bool nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply) { struct nlmsghdr *nlmsghdr; reply->data = NULL; reply->size = 0; if (dump->status) { return false; } while (!dump->buffer.size) { int retval = nl_dump_recv(dump); if (retval) { ofpbuf_clear(&dump->buffer); if (retval != EAGAIN) { dump->status = retval; return false; } } } nlmsghdr = nl_msg_next(&dump->buffer, reply); if (!nlmsghdr) { VLOG_WARN_RL(&rl, "netlink dump reply contains message fragment"); dump->status = EPROTO; return false; } else if (nlmsghdr->nlmsg_type == NLMSG_DONE) { dump->status = EOF; return false; } return true; }
/* Populates 'b' with an Ethernet LLC+SNAP packet headed with the given * 'eth_dst', 'eth_src', 'snap_org', and 'snap_type'. A payload of 'size' * bytes is allocated in 'b' and returned. This payload may be populated with * appropriate information by the caller. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void * snap_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], const uint8_t eth_src[ETH_ADDR_LEN], unsigned int oui, uint16_t snap_type, size_t size) { struct eth_header *eth; struct llc_snap_header *llc_snap; void *payload; /* Compose basic packet structure. (We need the payload size to stick into * the 802.2 header.) */ ofpbuf_clear(b); ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + LLC_SNAP_HEADER_LEN + size); ofpbuf_reserve(b, VLAN_HEADER_LEN); eth = ofpbuf_put_zeros(b, ETH_HEADER_LEN); llc_snap = ofpbuf_put_zeros(b, LLC_SNAP_HEADER_LEN); payload = ofpbuf_put_uninit(b, size); /* Compose 802.2 header. */ memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(b->size - ETH_HEADER_LEN); /* Compose LLC, SNAP headers. */ llc_snap->llc.llc_dsap = LLC_DSAP_SNAP; llc_snap->llc.llc_ssap = LLC_SSAP_SNAP; llc_snap->llc.llc_cntl = LLC_CNTL_SNAP; llc_snap->snap.snap_org[0] = oui >> 16; llc_snap->snap.snap_org[1] = oui >> 8; llc_snap->snap.snap_org[2] = oui; llc_snap->snap.snap_type = htons(snap_type); return payload; }
/* Fills 'b' with a Reverse ARP packet with Ethernet source address 'eth_src'. * This function is used by Open vSwitch to compose packets in cases where * context is important but content doesn't (or shouldn't) matter. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) { struct eth_header *eth; struct rarp_header *rarp; ofpbuf_clear(b); ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + RARP_HEADER_LEN); ofpbuf_reserve(b, VLAN_HEADER_LEN); eth = ofpbuf_put_uninit(b, sizeof *eth); memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(ETH_TYPE_RARP); rarp = ofpbuf_put_uninit(b, sizeof *rarp); rarp->hw_addr_space = htons(ARP_HTYPE_ETH); rarp->proto_addr_space = htons(ETH_TYPE_IP); rarp->hw_addr_length = ETH_ADDR_LEN; rarp->proto_addr_length = sizeof rarp->src_proto_addr; rarp->opcode = htons(RARP_REQUEST_REVERSE); memcpy(rarp->src_hw_addr, eth_src, ETH_ADDR_LEN); rarp->src_proto_addr = htonl(0); memcpy(rarp->target_hw_addr, eth_src, ETH_ADDR_LEN); rarp->target_proto_addr = htonl(0); }
static void nl_sock_record_errors__(struct nl_transaction **transactions, size_t n, int error) { size_t i; for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; txn->error = error; if (txn->reply) { ofpbuf_clear(txn->reply); } } }
/* Initialize a conntrack netlink dump. */ int nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone) { struct nl_ct_dump_state *state; *statep = state = xzalloc(sizeof *state); ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE); if (zone) { state->filter_zone = true; state->zone = *zone; } nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET, NLM_F_REQUEST); nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf); ofpbuf_clear(&state->buf); return 0; }
/* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated * in 'b' and returned. This payload may be populated with appropriate * information by the caller. Sets 'b''s 'l2' and 'l3' pointers to the * Ethernet header and payload respectively. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void * eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], const uint8_t eth_src[ETH_ADDR_LEN], uint16_t eth_type, size_t size) { void *data; struct eth_header *eth; ofpbuf_clear(b); ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + size); ofpbuf_reserve(b, VLAN_HEADER_LEN); eth = ofpbuf_put_uninit(b, ETH_HEADER_LEN); data = ofpbuf_put_uninit(b, size); memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); eth->eth_type = htons(eth_type); b->l2 = eth; b->l3 = data; return data; }
int main(int argc, char *argv[]) { struct unixctl_server *server; enum { MAX_RECV = 1500 }; const char *target; struct ofpbuf buf; bool exiting = false; int error; int sock; int n; proctitle_init(argc, argv); set_program_name(argv[0]); parse_options(argc, argv); if (argc - optind != 1) { ovs_fatal(0, "exactly one non-option argument required " "(use --help for help)"); } target = argv[optind]; sock = inet_open_passive(SOCK_DGRAM, target, 0, NULL, 0); if (sock < 0) { ovs_fatal(0, "%s: failed to open (%s)", argv[1], strerror(-sock)); } daemon_save_fd(STDOUT_FILENO); daemonize_start(); error = unixctl_server_create(NULL, &server); if (error) { ovs_fatal(error, "failed to create unixctl server"); } unixctl_command_register("exit", "", 0, 0, test_netflow_exit, &exiting); daemonize_complete(); ofpbuf_init(&buf, MAX_RECV); n = 0; for (;;) { int retval; unixctl_server_run(server); ofpbuf_clear(&buf); do { retval = read(sock, buf.data, buf.allocated); } while (retval < 0 && errno == EINTR); if (retval > 0) { ofpbuf_put_uninit(&buf, retval); if (n++ > 0) { putchar('\n'); } print_netflow(&buf); fflush(stdout); } if (exiting) { break; } poll_fd_wait(sock, POLLIN); unixctl_server_wait(server); poll_block(); } return 0; }
static int nl_sock_transact_multiple__(struct nl_sock *sock, struct nl_transaction **transactions, size_t n, size_t *done) { uint64_t tmp_reply_stub[1024 / 8]; struct nl_transaction tmp_txn; struct ofpbuf tmp_reply; uint32_t base_seq; struct iovec iovs[MAX_IOVS]; struct msghdr msg; int error; int i; base_seq = nl_sock_allocate_seq(sock, n); *done = 0; for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(txn->request); nlmsg->nlmsg_len = txn->request->size; nlmsg->nlmsg_seq = base_seq + i; nlmsg->nlmsg_pid = sock->pid; iovs[i].iov_base = txn->request->data; iovs[i].iov_len = txn->request->size; } memset(&msg, 0, sizeof msg); msg.msg_iov = iovs; msg.msg_iovlen = n; do { error = sendmsg(sock->fd, &msg, 0) < 0 ? errno : 0; } while (error == EINTR); for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; log_nlmsg(__func__, error, txn->request->data, txn->request->size, sock->protocol); } if (!error) { COVERAGE_ADD(netlink_sent, n); } if (error) { return error; } ofpbuf_use_stub(&tmp_reply, tmp_reply_stub, sizeof tmp_reply_stub); tmp_txn.request = NULL; tmp_txn.reply = &tmp_reply; tmp_txn.error = 0; while (n > 0) { struct nl_transaction *buf_txn, *txn; uint32_t seq; /* Find a transaction whose buffer we can use for receiving a reply. * If no such transaction is left, use tmp_txn. */ buf_txn = &tmp_txn; for (i = 0; i < n; i++) { if (transactions[i]->reply) { buf_txn = transactions[i]; break; } } /* Receive a reply. */ error = nl_sock_recv__(sock, buf_txn->reply, false); if (error) { if (error == EAGAIN) { nl_sock_record_errors__(transactions, n, 0); *done += n; error = 0; } break; } /* Match the reply up with a transaction. */ seq = nl_msg_nlmsghdr(buf_txn->reply)->nlmsg_seq; if (seq < base_seq || seq >= base_seq + n) { VLOG_DBG_RL(&rl, "ignoring unexpected seq %#"PRIx32, seq); continue; } i = seq - base_seq; txn = transactions[i]; /* Fill in the results for 'txn'. */ if (nl_msg_nlmsgerr(buf_txn->reply, &txn->error)) { if (txn->reply) { ofpbuf_clear(txn->reply); } if (txn->error) { VLOG_DBG_RL(&rl, "received NAK error=%d (%s)", error, ovs_strerror(txn->error)); } } else { txn->error = 0; if (txn->reply && txn != buf_txn) { /* Swap buffers. */ struct ofpbuf *reply = buf_txn->reply; buf_txn->reply = txn->reply; txn->reply = reply; } } /* Fill in the results for transactions before 'txn'. (We have to do * this after the results for 'txn' itself because of the buffer swap * above.) */ nl_sock_record_errors__(transactions, i, 0); /* Advance. */ *done += i + 1; transactions += i + 1; n -= i + 1; base_seq += i + 1; } ofpbuf_uninit(&tmp_reply); return error; }
static int nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; uint8_t tail[65536]; struct iovec iov[2]; struct msghdr msg; ssize_t retval; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = buf->base; iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; do { retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); } while (retval < 0 && errno == EINTR); if (retval < 0) { int error = errno; if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %zu bytes)", sizeof tail); return E2BIG; } nlmsghdr = buf->data; if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%zd bytes < %zu)", retval, sizeof *nlmsghdr); return EPROTO; } buf->size = MIN(retval, buf->allocated); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); return 0; }
int nl_ct_flush_zone(uint16_t flush_zone) { /* Apparently, there's no netlink interface to flush a specific zone. * This code dumps every connection, checks the zone and eventually * delete the entry. * * This is race-prone, but it is better than using shell scripts. */ struct nl_dump dump; struct ofpbuf buf, reply, delete; ofpbuf_init(&buf, NL_DUMP_BUFSIZE); ofpbuf_init(&delete, NL_DUMP_BUFSIZE); nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET, NLM_F_REQUEST); nl_dump_start(&dump, NETLINK_NETFILTER, &buf); ofpbuf_clear(&buf); for (;;) { struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]; enum nl_ct_event_type event_type; uint8_t nfgen_family; uint16_t zone = 0; if (!nl_dump_next(&dump, &reply, &buf)) { break; } if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family, attrs)) { continue; }; if (attrs[CTA_ZONE]) { zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE])); } if (zone != flush_zone) { /* The entry is not in the zone we're flushing. */ continue; } nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST); nl_msg_put_be16(&delete, CTA_ZONE, htons(zone)); nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1, attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN); nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1, attrs[CTA_ID]->nla_len - NLA_HDRLEN); nl_transact(NETLINK_NETFILTER, &delete, NULL); ofpbuf_clear(&delete); } nl_dump_done(&dump); ofpbuf_uninit(&delete); ofpbuf_uninit(&buf); /* Expectations are flushed automatically, because they do not * have a master connection anymore */ return 0; }
static int nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the * "typical" case. To handle exceptions, we make available enough space in * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; #ifdef _WIN32 #define MAX_STACK_LENGTH 81920 uint8_t tail[MAX_STACK_LENGTH]; #else uint8_t tail[65536]; #endif struct iovec iov[2]; struct msghdr msg; ssize_t retval; int error; ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); iov[0].iov_base = ofpbuf_base(buf); iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; /* Receive a Netlink message from the kernel. * * This works around a kernel bug in which the kernel returns an error code * as if it were the number of bytes read. It doesn't actually modify * anything in the receive buffer in that case, so we can initialize the * Netlink header with an impossible message length and then, upon success, * check whether it changed. */ nlmsghdr = ofpbuf_base(buf); do { nlmsghdr->nlmsg_len = UINT32_MAX; #ifdef _WIN32 boolean result = false; DWORD last_error = 0; result = ReadFile(sock->handle, tail, MAX_STACK_LENGTH, &retval, NULL); last_error = GetLastError(); if (last_error != ERROR_SUCCESS && !result) { retval = -1; errno = EAGAIN; } else { ofpbuf_put(buf, tail, retval); } #else retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); #endif error = (retval < 0 ? errno : retval == 0 ? ECONNRESET /* not possible? */ : nlmsghdr->nlmsg_len != UINT32_MAX ? 0 : retval); } while (error == EINTR); if (error) { if (error == ENOBUFS) { /* Socket receive buffer overflow dropped one or more messages that * the kernel tried to send to us. */ COVERAGE_INC(netlink_overflow); } return error; } if (msg.msg_flags & MSG_TRUNC) { VLOG_ERR_RL(&rl, "truncated message (longer than %"PRIuSIZE" bytes)", sizeof tail); return E2BIG; } if (retval < sizeof *nlmsghdr || nlmsghdr->nlmsg_len < sizeof *nlmsghdr || nlmsghdr->nlmsg_len > retval) { VLOG_ERR_RL(&rl, "received invalid nlmsg (%"PRIuSIZE" bytes < %"PRIuSIZE")", retval, sizeof *nlmsghdr); return EPROTO; } #ifndef _WIN32 ofpbuf_set_size(buf, MIN(retval, buf->allocated)); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } #endif log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol); COVERAGE_INC(netlink_received); return 0; }