[ovs-dev] [PATCH v1] netdev-linux: Replace sendmsg with sendmmsg in netdev_linux_send

Zhenyu Gao sysugaozhenyu at gmail.com
Tue Jun 6 01:27:28 UTC 2017


Sendmmsg can reduce cpu cycles in sending packets to kernel.
Replace sendmsg with sendmmsg in function netdev_linux_send to send
batch packets.

Signed-off-by: Zhenyu Gao <sysugaozhenyu at gmail.com>
---
 lib/netdev-linux.c | 122 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 73 insertions(+), 49 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 8ae740a..bc975cd 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1194,10 +1194,14 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
     int error = 0;
     int sock = 0;
+    ssize_t retval;
 
-    struct sockaddr_ll sll;
-    struct msghdr msg;
     if (!is_tap_netdev(netdev_)) {
+        uint32_t resend_idx = 0;
+        struct mmsghdr *msg;
+        struct iovec *iov;
+        struct sockaddr_ll sll;
+
         sock = af_packet_sock();
         if (sock < 0) {
             error = -sock;
@@ -1216,34 +1220,56 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
         sll.sll_family = AF_PACKET;
         sll.sll_ifindex = ifindex;
 
-        msg.msg_name = &sll;
-        msg.msg_namelen = sizeof sll;
-        msg.msg_iovlen = 1;
-        msg.msg_control = NULL;
-        msg.msg_controllen = 0;
-        msg.msg_flags = 0;
-    }
+        msg = xmalloc(sizeof(*msg) * batch->count);
+        iov = xmalloc(sizeof(*iov) * batch->count);
 
-    /* 'i' is incremented only if there's no error */
-    for (int i = 0; i < batch->count; ) {
-        const void *data = dp_packet_data(batch->packets[i]);
-        size_t size = dp_packet_size(batch->packets[i]);
-        ssize_t retval;
+        for (int i = 0; i < batch->count; i++) {
+            const void *data = dp_packet_data(batch->packets[i]);
+            size_t size = dp_packet_size(batch->packets[i]);
 
-        /* Truncate the packet if it is configured. */
-        size -= dp_packet_get_cutlen(batch->packets[i]);
+            /* Truncate the packet if it is configured. */
+            size -= dp_packet_get_cutlen(batch->packets[i]);
 
-        if (!is_tap_netdev(netdev_)) {
             /* Use our AF_PACKET socket to send to this device. */
-            struct iovec iov;
 
-            iov.iov_base = CONST_CAST(void *, data);
-            iov.iov_len = size;
+            msg[i].msg_hdr.msg_name = &sll;
+            msg[i].msg_hdr.msg_namelen = sizeof sll;
+            msg[i].msg_hdr.msg_iovlen = 1;
+            msg[i].msg_hdr.msg_control = NULL;
+            msg[i].msg_hdr.msg_controllen = 0;
+            msg[i].msg_hdr.msg_flags = 0;
+            iov[i].iov_base = CONST_CAST(void *, data);
+            iov[i].iov_len = size;
+            msg[i].msg_hdr.msg_iov = &iov[i];
+        }
 
-            msg.msg_iov = &iov;
+resend_batch: 
+        retval = sendmmsg(sock, msg + resend_idx,
+                          batch->count - resend_idx, 0);
+        if (retval < 0) {
+            if (errno == EINTR) {
+                goto resend_batch;
+            }
+            /* The Linux AF_PACKET implementation never blocks waiting for
+             * room for packets, instead returning ENOBUFS.  Translate this
+             * into EAGAIN for the caller. */
+            error = errno == ENOBUFS ? EAGAIN : errno;
+        } else if(retval != batch->count - resend_idx) {
+            resend_idx += retval;
+            goto resend_batch;
+        } 
+
+        free(msg);
+        free(iov);
+    } else {
+        /* 'i' is incremented only if there's no error */
+        for (int i = 0; i < batch->count; ) {
+            const void *data = dp_packet_data(batch->packets[i]);
+            size_t size = dp_packet_size(batch->packets[i]);
+
+            /* Truncate the packet if it is configured. */
+            size -= dp_packet_get_cutlen(batch->packets[i]);
 
-            retval = sendmsg(sock, &msg, 0);
-        } else {
             /* Use the tap fd to send to this device.  This is essential for
              * tap devices, because packets sent to a tap device with an
              * AF_PACKET socket will loop back to be *received* again on the
@@ -1252,45 +1278,43 @@ netdev_linux_send(struct netdev *netdev_, int qid OVS_UNUSED,
             struct netdev_linux *netdev = netdev_linux_cast(netdev_);
 
             retval = write(netdev->tap_fd, data, size);
-        }
 
-        if (retval < 0) {
-            if (errno == EINTR) {
-                /* The send was interrupted by a signal.  Retry the packet by
-                 * continuing without incrementing 'i'.*/
-                continue;
-            } else if (errno == EIO && is_tap_netdev(netdev_)) {
-                /* The Linux tap driver returns EIO if the device is not up.
-                 * From the OVS side this is not an error, so ignore it. */
-            } else {
-                /* The Linux AF_PACKET implementation never blocks waiting for
-                 * room for packets, instead returning ENOBUFS.  Translate this
-                 * into EAGAIN for the caller. */
-                error = errno == ENOBUFS ? EAGAIN : errno;
+            if (retval < 0) {
+                if (errno == EINTR) {
+                    /* The send was interrupted by a signal.
+                     * Retry the packet by continuing without
+                     * incrementing 'i'.*/
+                    continue;
+                } else if (errno == EIO) {
+                    /* The Linux tap driver returns EIO if the device
+                     * is not up. From the OVS side this is not an error,
+                     * so ignore it. */
+                } else {
+                    break;
+                }
+            } else if (retval != size) {
+                VLOG_WARN_RL(&rl, "sent partial Ethernet packet"
+                                  " (%"PRIuSIZE" bytes"
+                                  " of %"PRIuSIZE") on %s", retval, size,
+                             netdev_get_name(netdev_));
+                error = EMSGSIZE;
                 break;
             }
-        } else if (retval != size) {
-            VLOG_WARN_RL(&rl, "sent partial Ethernet packet (%"PRIuSIZE" bytes"
-                              " of %"PRIuSIZE") on %s", retval, size,
-                         netdev_get_name(netdev_));
-            error = EMSGSIZE;
-            break;
-        }
 
-        /* Process the next packet in the batch */
-        i++;
+            /* Process the next packet in the batch */
+            i++;
+        }
     }
 
     if (error && error != EAGAIN) {
-            VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
-                         netdev_get_name(netdev_), ovs_strerror(error));
+        VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
+                     netdev_get_name(netdev_), ovs_strerror(error));
     }
 
 free_batch:
     dp_packet_delete_batch(batch, may_steal);
 
     return error;
-
 }
 
 /* Registers with the poll loop to wake up from the next call to poll_block()
-- 
1.8.3.1



More information about the dev mailing list