[ovs-dev] [PATCH 3/5 v2] lib/netlink-socket.c: add support for nl_transact() on Windows

Nithin Raju nithin at vmware.com
Fri Sep 19 21:30:54 UTC 2014


In this patch, we add support for nl_transact() on Windows using
the OVS_IOCTL_TRANSACT ioctl that sends down the request and gets
the reply in the same call to the kernel.

This is obviously a digression from the way it is implemented in
Linux where all the sends are done at once using sendmsg() and
replies are received one at a time.

Initial implementation was in the Linux way using multiple writes
followed by reads, but decided against it since it is not efficient
and also it complicates the state machine in the kernel.

The Windows implementation has equivalent code for handling corner
cases and error coditions similar to Linux. Some of it is not
applicable yet. Eg. the Windows kernel does not embed an error
in the netlink message itself. There's userspace code nevertheless
for this.

Signed-off-by: Nithin Raju <nithin at vmware.com>
Acked-by: Samuel Ghinet <sghinet at cloudbasesolutions.com>
Acked-by: Eitan Eliahu <eliahue at vmware.com>
---
 lib/netlink-socket.c |  101 ++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 81 insertions(+), 20 deletions(-)

diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 1dd7a24..1717ab8 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -438,8 +438,8 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg,
         DWORD bytes;
 
         if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
-                            ofpbuf_data(msg), ofpbuf_size(msg), NULL, 0,
-                            &bytes, NULL)) {
+                             ofpbuf_data(msg), ofpbuf_size(msg), NULL, 0,
+                             &bytes, NULL)) {
             retval = -1;
             /* XXX: Map to a more appropriate error based on GetLastError(). */
             errno = EINVAL;
@@ -659,34 +659,19 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
         iovs[i].iov_len = ofpbuf_size(txn->request);
     }
 
+#ifndef _WIN32
     memset(&msg, 0, sizeof msg);
     msg.msg_iov = iovs;
     msg.msg_iovlen = n;
     do {
-#ifdef _WIN32
-    DWORD last_error = 0;
-    bool result = FALSE;
-    for (i = 0; i < n; i++) {
-        result = WriteFile((HANDLE)sock->handle, iovs[i].iov_base, iovs[i].iov_len,
-                           &error, NULL);
-        last_error = GetLastError();
-        if (last_error != ERROR_SUCCESS && !result) {
-            error = EAGAIN;
-            errno = EAGAIN;
-        } else {
-            error = 0;
-        }
-    }
-#else
         error = sendmsg(sock->fd, &msg, 0) < 0 ? errno : 0;
-#endif
     } while (error == EINTR);
 
     for (i = 0; i < n; i++) {
         struct nl_transaction *txn = transactions[i];
 
-        log_nlmsg(__func__, error, ofpbuf_data(txn->request), ofpbuf_size(txn->request),
-                  sock->protocol);
+        log_nlmsg(__func__, error, ofpbuf_data(txn->request),
+                  ofpbuf_size(txn->request), sock->protocol);
     }
     if (!error) {
         COVERAGE_ADD(netlink_sent, n);
@@ -765,6 +750,82 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
         base_seq += i + 1;
     }
     ofpbuf_uninit(&tmp_reply);
+#else
+    error = 0;
+    for (i = 0; i < n; i++) {
+        DWORD reply_len;
+        uint8_t tail[65536];
+        struct nl_transaction *txn = transactions[i];
+        struct nlmsghdr *request_nlmsg, *reply_nlmsg;
+
+        if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
+                             ofpbuf_data(txn->request),
+                             ofpbuf_size(txn->request),
+                             txn->reply ? tail : 0,
+                             txn->reply ? sizeof tail : 0,
+                             &reply_len, NULL)) {
+            /* XXX: Map to a more appropriate error. */
+            error = EINVAL;
+            break;
+        }
+
+        if (txn->reply) {
+            if (reply_len < sizeof *reply_nlmsg) {
+                VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32,
+                            reply_len);
+                break;
+            }
+
+            /* Validate the sequence number in the reply. */
+            request_nlmsg = nl_msg_nlmsghdr(txn->request);
+            reply_nlmsg = (struct nlmsghdr *)tail;
+
+            if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
+                ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
+                VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
+                    ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
+                    reply_nlmsg->nlmsg_seq);
+                break;
+            }
+
+            /* If reply was expected, verify if there was indeed a reply
+             * received. */
+            if (reply_len == 0) {
+                nl_sock_record_errors__(transactions, n, 0);
+                VLOG_DBG_RL(&rl, "reply not seen when expected seq %#"PRIx32,
+                            request_nlmsg->nlmsg_seq);
+                break;
+            }
+
+            /* Copy the reply to the buffer specified by the caller. */
+            if (reply_len > txn->reply->allocated) {
+                ofpbuf_reinit(txn->reply, reply_len);
+            }
+            memcpy(ofpbuf_data(txn->reply), tail, reply_len);
+            ofpbuf_set_size(txn->reply, reply_len);
+
+            /* Handle errors embedded within the netlink message. */
+            if (nl_msg_nlmsgerr(txn->reply, &txn->error)) {
+                if (txn->reply) {
+                    ofpbuf_clear(txn->reply);
+                }
+                if (txn->error) {
+                    VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+                                error, ovs_strerror(txn->error));
+                }
+            } else {
+                txn->error = 0;
+            }
+        }
+
+        /* Count the number of successful transactions. */
+        (*done)++;
+    }
+
+    if (!error) {
+        COVERAGE_ADD(netlink_sent, n);
+    }
+#endif
 
     return error;
 }
-- 
1.7.4.1




More information about the dev mailing list