[ovs-dev] [optimize 11/26] netlink: Postpone choosing sequence numbers until send time.

Ben Pfaff blp at nicira.com
Tue Apr 17 00:18:51 UTC 2012


Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 lib/netlink-socket.c |   30 +++++++++++++++++++++++-------
 lib/netlink.c        |   29 +++++------------------------
 2 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 15a800b..90734c7 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -54,6 +54,7 @@ COVERAGE_DEFINE(netlink_sent);
  * information.  Also, at high logging levels we log *all* Netlink messages. */
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 600);
 
+static uint32_t nl_sock_allocate_seq(struct nl_sock *, unsigned int n);
 static void log_nlmsg(const char *function, int error,
                       const void *message, size_t size, int protocol);
 
@@ -62,6 +63,7 @@ static void log_nlmsg(const char *function, int error,
 struct nl_sock
 {
     int fd;
+    uint32_t next_seq;
     uint32_t pid;
     int protocol;
     struct nl_dump *dump;
@@ -122,6 +124,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
     }
     sock->protocol = protocol;
     sock->dump = NULL;
+    sock->next_seq = 1;
 
     rcvbuf = 1024 * 1024;
     if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
@@ -256,6 +259,7 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, bool wait)
     int error;
 
     nlmsg->nlmsg_len = msg->size;
+    nlmsg->nlmsg_seq = nl_sock_allocate_seq(sock, 1);
     nlmsg->nlmsg_pid = sock->pid;
     do {
         int retval;
@@ -522,7 +526,6 @@ nl_sock_transact_multiple__(struct nl_sock *sock,
  *
  * Before sending each message, this function will finalize nlmsg_len in each
  * 'request' to match the ofpbuf's size, and set nlmsg_pid to 'sock''s pid.
- * NLM_F_ACK will be added to some requests' nlmsg_flags.
  *
  * Bare Netlink is an unreliable transport protocol.  This function layers
  * reliable delivery and reply semantics on top of bare Netlink.  See
@@ -597,9 +600,9 @@ nl_sock_transact_multiple(struct nl_sock *sock,
  * on failure '*replyp' is set to NULL.  If 'replyp' is null, then the kernel's
  * reply, if any, is discarded.
  *
- * nlmsg_len in 'msg' will be finalized to match msg->size, and nlmsg_pid will
- * be set to 'sock''s pid, before the message is sent.  NLM_F_ACK will be set
- * in nlmsg_flags.
+ * Before the message is sent, nlmsg_len in 'request' will be finalized to
+ * match msg->size, nlmsg_pid will be set to 'sock''s pid, and nlmsg_seq will
+ * be initialized, NLM_F_ACK will be set in nlmsg_flags.
  *
  * The caller is responsible for destroying 'request'.
  *
@@ -721,9 +724,6 @@ void
 nl_dump_start(struct nl_dump *dump,
               struct nl_sock *sock, const struct ofpbuf *request)
 {
-    struct nlmsghdr *nlmsghdr = nl_msg_nlmsghdr(request);
-    nlmsghdr->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
-    dump->seq = nlmsghdr->nlmsg_seq;
     dump->buffer = NULL;
     if (sock->dump) {
         /* 'sock' already has an ongoing dump.  Clone the socket because
@@ -737,7 +737,10 @@ nl_dump_start(struct nl_dump *dump,
         dump->sock = sock;
         dump->status = 0;
     }
+
+    nl_msg_nlmsghdr(request)->nlmsg_flags |= NLM_F_DUMP | NLM_F_ACK;
     dump->status = nl_sock_send__(sock, request, true);
+    dump->seq = nl_msg_nlmsghdr(request)->nlmsg_seq;
 }
 
 /* Helper function for nl_dump_next(). */
@@ -1057,6 +1060,19 @@ nl_lookup_genl_family(const char *name, int *number)
     return *number > 0 ? 0 : -*number;
 }
 
+static uint32_t
+nl_sock_allocate_seq(struct nl_sock *sock, unsigned int n)
+{
+    uint32_t seq = sock->next_seq;
+
+    sock->next_seq += n;
+    if (sock->next_seq >= UINT32_MAX / 2) {
+        sock->next_seq = 1;
+    }
+
+    return seq;
+}
+
 static void
 nlmsghdr_to_string(const struct nlmsghdr *h, int protocol, struct ds *ds)
 {
diff --git a/lib/netlink.c b/lib/netlink.c
index 6445049..6e0701c 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -88,26 +88,6 @@ nl_msg_reserve(struct ofpbuf *msg, size_t size)
     ofpbuf_prealloc_tailroom(msg, NLMSG_ALIGN(size));
 }
 
-static uint32_t
-get_nlmsg_seq(void)
-{
-    /* Next nlmsghdr sequence number.
-     *
-     * This implementation uses sequence numbers that are unique process-wide,
-     * to avoid a hypothetical race: send request, close socket, open new
-     * socket that reuses the old socket's PID value, send request on new
-     * socket, receive reply from kernel to old socket but with same PID and
-     * sequence number.  (This race could be avoided other ways, e.g. by
-     * preventing PIDs from being quickly reused). */
-    static uint32_t next_seq;
-
-    if (next_seq == 0) {
-        /* Pick initial sequence number. */
-        next_seq = getpid() ^ time_wall();
-    }
-    return next_seq++;
-}
-
 /* Puts a nlmsghdr at the beginning of 'msg', which must be initially empty.
  * Uses the given 'type' and 'flags'.  'expected_payload' should be
  * an estimate of the number of payload bytes to be supplied; if the size of
@@ -121,8 +101,9 @@ get_nlmsg_seq(void)
  * is often NLM_F_REQUEST indicating that a request is being made, commonly
  * or'd with NLM_F_ACK to request an acknowledgement.
  *
- * Sets the new nlmsghdr's nlmsg_pid field to 0 for now.  nl_sock_send() will
- * fill it in just before sending the message.
+ * Sets the new nlmsghdr's nlmsg_len, nlmsg_seq, and nlmsg_pid fields to 0 for
+ * now.  Functions that send Netlink messages will fill these in just before
+ * sending the message.
  *
  * nl_msg_put_genlmsghdr() is more convenient for composing a Generic Netlink
  * message. */
@@ -139,7 +120,7 @@ nl_msg_put_nlmsghdr(struct ofpbuf *msg,
     nlmsghdr->nlmsg_len = 0;
     nlmsghdr->nlmsg_type = type;
     nlmsghdr->nlmsg_flags = flags;
-    nlmsghdr->nlmsg_seq = get_nlmsg_seq();
+    nlmsghdr->nlmsg_seq = 0;
     nlmsghdr->nlmsg_pid = 0;
 }
 
-- 
1.7.9




More information about the dev mailing list