[ovs-dev] [PATCH v3] netdev-linux: Replace sendmsg with sendmmsg in netdev_linux_send
Zhenyu Gao
sysugaozhenyu at gmail.com
Mon Jul 17 02:39:57 UTC 2017
Sendmmsg can reduce cpu cycles in sending packets to kernel.
Replace sendmsg with sendmmsg in function netdev_linux_send to send
batch packets if sendmmsg is available.
If kernel side doesn't support sendmmsg, will fallback to sendmsg.
netserver
|------------|
| |
| container |
|----veth----|
|
| |------------|
|---veth-| dpdk-ovs | netperf
| | |--------------|
|----dpdk----| | bare-metal |
| |--------------|
| |
| |
pnic-----------pnic
Netperf was consumed to test the performance:
1)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 1400
result: netserver received 2383.21Mb(sendmsg)/2551.64Mb(sendmmsg)
2)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 60
result: netserver received 109.72Mb(sendmsg)/115.18Mb(sendmmsg)
Sendmmsg show about 6% improvement in netperf UDP testing.
Signed-off-by: Zhenyu Gao <sysugaozhenyu at gmail.com>
---
lib/netdev-linux.c | 85 +++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 62 insertions(+), 23 deletions(-)
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e1d8701..d991d05 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1182,6 +1182,54 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
}
}
+static inline int
+netdev_linux_sock_batch_send(int sock, struct msghdr *msg,
+ struct dp_packet_batch *batch)
+{
+ int error = 0;
+ ssize_t retval;
+ uint32_t resend_idx = 0;
+ struct mmsghdr *mmsg;
+ struct iovec *iov;
+
+ mmsg = xmalloc(sizeof(*mmsg) * batch->count);
+ iov = xmalloc(sizeof(*iov) * batch->count);
+
+ for (int i = 0; i < batch->count; i++) {
+ const void *data = dp_packet_data(batch->packets[i]);
+ size_t size = dp_packet_size(batch->packets[i]);
+
+ /* Truncate the packet if it is configured. */
+ size -= dp_packet_get_cutlen(batch->packets[i]);
+
+ iov[i].iov_base = CONST_CAST(void *, data);
+ iov[i].iov_len = size;
+ mmsg[i].msg_hdr = *msg;
+ mmsg[i].msg_hdr.msg_iov = &iov[i];
+ }
+
+resend_batch:
+ retval = sendmmsg(sock, mmsg + resend_idx,
+ batch->count - resend_idx, 0);
+ if (retval < 0) {
+ if (errno == EINTR) {
+ goto resend_batch;
+ }
+ /* The Linux AF_PACKET implementation never blocks waiting for
+ * room for packets, instead returning ENOBUFS. Translate this
+ * into EAGAIN for the caller. */
+ error = errno == ENOBUFS ? EAGAIN : errno;
+ } else if (retval != batch->count - resend_idx) {
+ /* Send remain packets again. */
+ resend_idx += retval;
+ goto resend_batch;
+ }
+
+ free(mmsg);
+ free(iov);
+ return error;
+}
+
/* Sends 'buffer' on 'netdev'. Returns 0 if successful, otherwise a positive
* errno value. Returns EAGAIN without blocking if the packet cannot be queued
* immediately. Returns EMSGSIZE if a partial packet was transmitted or if
@@ -1226,6 +1274,9 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
+
+ error = netdev_linux_sock_batch_send(sock, &msg, batch);
+ goto check_error;
}
/* 'i' is incremented only if there's no error */
@@ -1236,34 +1287,21 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
/* Truncate the packet if it is configured. */
size -= dp_packet_get_cutlen(batch->packets[i]);
+ /* Use the tap fd to send to this device. This is essential for
+ * tap devices, because packets sent to a tap device with an
+ * AF_PACKET socket will loop back to be *received* again on the
+ * tap device. This doesn't occur on other interface types
+ * because we attach a socket filter to the rx socket. */
+ struct netdev_linux *netdev = netdev_linux_cast(netdev_);
- if (!is_tap_netdev(netdev_)) {
- /* Use our AF_PACKET socket to send to this device. */
- struct iovec iov;
-
- iov.iov_base = CONST_CAST(void *, data);
- iov.iov_len = size;
-
- msg.msg_iov = &iov;
-
- retval = sendmsg(sock, &msg, 0);
- } else {
- /* Use the tap fd to send to this device. This is essential for
- * tap devices, because packets sent to a tap device with an
- * AF_PACKET socket will loop back to be *received* again on the
- * tap device. This doesn't occur on other interface types
- * because we attach a socket filter to the rx socket. */
- struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-
- retval = write(netdev->tap_fd, data, size);
- }
+ retval = write(netdev->tap_fd, data, size);
if (retval < 0) {
if (errno == EINTR) {
/* The send was interrupted by a signal. Retry the packet by
* continuing without incrementing 'i'.*/
continue;
- } else if (errno == EIO && is_tap_netdev(netdev_)) {
+ } else if (errno == EIO) {
/* The Linux tap driver returns EIO if the device is not up.
* From the OVS side this is not an error, so ignore it. */
} else {
@@ -1285,9 +1323,10 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
i++;
}
+check_error:
if (error && error != EAGAIN) {
- VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
- netdev_get_name(netdev_), ovs_strerror(error));
+ VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+ netdev_get_name(netdev_), ovs_strerror(error));
}
free_batch:
--
1.8.3.1
More information about the dev
mailing list