/* Sends the 'request' member of the 'n' transactions in 'transactions' on * 'sock', in order, and receives responses to all of them. Fills in the * 'error' member of each transaction with 0 if it was successful, otherwise * with a positive errno value. If 'reply' is nonnull, then it will be filled * with the reply if the message receives a detailed reply. In other cases, * i.e. where the request failed or had no reply beyond an indication of * success, 'reply' will be cleared if it is nonnull. * * The caller is responsible for destroying each request and reply, and the * transactions array itself. * * Before sending each message, this function will finalize nlmsg_len in each * 'request' to match the ofpbuf's size, set nlmsg_pid to 'sock''s pid, and * initialize nlmsg_seq. * * Bare Netlink is an unreliable transport protocol. This function layers * reliable delivery and reply semantics on top of bare Netlink. See * nl_sock_transact() for some caveats. */ void nl_sock_transact_multiple(struct nl_sock *sock, struct nl_transaction **transactions, size_t n) { int max_batch_count; int error; if (!n) { return; } error = nl_sock_cow__(sock); if (error) { nl_sock_record_errors__(transactions, n, error); return; } /* In theory, every request could have a 64 kB reply. But the default and * maximum socket rcvbuf size with typical Dom0 memory sizes both tend to * be a bit below 128 kB, so that would only allow a single message in a * "batch". So we assume that replies average (at most) 4 kB, which allows * a good deal of batching. * * In practice, most of the requests that we batch either have no reply at * all or a brief reply. */ max_batch_count = MAX(sock->rcvbuf / 4096, 1); max_batch_count = MIN(max_batch_count, max_iovs); while (n > 0) { size_t count, bytes; size_t done; /* Batch up to 'max_batch_count' transactions. But cap it at about a * page of requests total because big skbuffs are expensive to * allocate in the kernel. */ #if defined(PAGESIZE) enum { MAX_BATCH_BYTES = MAX(1, PAGESIZE - 512) }; #else enum { MAX_BATCH_BYTES = 4096 - 512 }; #endif bytes = transactions[0]->request->size; for (count = 1; count < n && count < max_batch_count; count++) { if (bytes + transactions[count]->request->size > MAX_BATCH_BYTES) { break; } bytes += transactions[count]->request->size; } error = nl_sock_transact_multiple__(sock, transactions, count, &done); transactions += done; n -= done; if (error == ENOBUFS) { VLOG_DBG_RL(&rl, "receive buffer overflow, resending request"); } else if (error) { VLOG_ERR_RL(&rl, "transaction error (%s)", strerror(error)); nl_sock_record_errors__(transactions, n, error); } } }
static int nl_sock_transact_multiple__(struct nl_sock *sock, struct nl_transaction **transactions, size_t n, size_t *done) { uint64_t tmp_reply_stub[1024 / 8]; struct nl_transaction tmp_txn; struct ofpbuf tmp_reply; uint32_t base_seq; struct iovec iovs[MAX_IOVS]; struct msghdr msg; int error; int i; base_seq = nl_sock_allocate_seq(sock, n); *done = 0; for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(txn->request); nlmsg->nlmsg_len = txn->request->size; nlmsg->nlmsg_seq = base_seq + i; nlmsg->nlmsg_pid = sock->pid; iovs[i].iov_base = txn->request->data; iovs[i].iov_len = txn->request->size; } memset(&msg, 0, sizeof msg); msg.msg_iov = iovs; msg.msg_iovlen = n; do { error = sendmsg(sock->fd, &msg, 0) < 0 ? errno : 0; } while (error == EINTR); for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; log_nlmsg(__func__, error, txn->request->data, txn->request->size, sock->protocol); } if (!error) { COVERAGE_ADD(netlink_sent, n); } if (error) { return error; } ofpbuf_use_stub(&tmp_reply, tmp_reply_stub, sizeof tmp_reply_stub); tmp_txn.request = NULL; tmp_txn.reply = &tmp_reply; tmp_txn.error = 0; while (n > 0) { struct nl_transaction *buf_txn, *txn; uint32_t seq; /* Find a transaction whose buffer we can use for receiving a reply. * If no such transaction is left, use tmp_txn. */ buf_txn = &tmp_txn; for (i = 0; i < n; i++) { if (transactions[i]->reply) { buf_txn = transactions[i]; break; } } /* Receive a reply. */ error = nl_sock_recv__(sock, buf_txn->reply, false); if (error) { if (error == EAGAIN) { nl_sock_record_errors__(transactions, n, 0); *done += n; error = 0; } break; } /* Match the reply up with a transaction. */ seq = nl_msg_nlmsghdr(buf_txn->reply)->nlmsg_seq; if (seq < base_seq || seq >= base_seq + n) { VLOG_DBG_RL(&rl, "ignoring unexpected seq %#"PRIx32, seq); continue; } i = seq - base_seq; txn = transactions[i]; /* Fill in the results for 'txn'. */ if (nl_msg_nlmsgerr(buf_txn->reply, &txn->error)) { if (txn->reply) { ofpbuf_clear(txn->reply); } if (txn->error) { VLOG_DBG_RL(&rl, "received NAK error=%d (%s)", error, ovs_strerror(txn->error)); } } else { txn->error = 0; if (txn->reply && txn != buf_txn) { /* Swap buffers. */ struct ofpbuf *reply = buf_txn->reply; buf_txn->reply = txn->reply; txn->reply = reply; } } /* Fill in the results for transactions before 'txn'. (We have to do * this after the results for 'txn' itself because of the buffer swap * above.) */ nl_sock_record_errors__(transactions, i, 0); /* Advance. */ *done += i + 1; transactions += i + 1; n -= i + 1; base_seq += i + 1; } ofpbuf_uninit(&tmp_reply); return error; }