[ovs-dev] [PATCHv4 1/7] netlink: Remove buffer from 'struct nl_dump'.

Joe Stringer joestringer at nicira.com
Thu Feb 27 22:13:05 UTC 2014


This patch makes all of the users of 'struct nl_dump' allocate their own
buffers to pass down to nl_dump_next(). This paves the way for allowing
multithreaded flow dumping.

Signed-off-by: Joe Stringer <joestringer at nicira.com>
---
v3: Rebase.
v2: Add NL_DUMP_BUFSIZE.
    Add buffer to dpif_linux_flow_state.
    Fold "netlink: Drain sockets with separate ofpbuf." into this patch.
    Fold "netlink: Explicitly pass buffers to nl_dump_next()." in.
    Use ofpbuf_use_stub() in most situations.
---
 lib/dpif-linux.c     |   41 ++++++++++++++++++++++++++++-------------
 lib/netdev-linux.c   |   42 ++++++++++++++++++++++++++++--------------
 lib/netlink-socket.c |   37 +++++++++++++++++++++----------------
 lib/netlink-socket.h |    5 +++--
 lib/route-table.c    |   14 ++++++++++----
 5 files changed, 90 insertions(+), 49 deletions(-)

diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c
index 18de118..723de23 100644
--- a/lib/dpif-linux.c
+++ b/lib/dpif-linux.c
@@ -190,7 +190,8 @@ static int
 dpif_linux_enumerate(struct sset *all_dps)
 {
     struct nl_dump dump;
-    struct ofpbuf msg;
+    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf msg, buf;
     int error;
 
     error = dpif_linux_init();
@@ -198,14 +199,16 @@ dpif_linux_enumerate(struct sset *all_dps)
         return error;
     }
 
+    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
     dpif_linux_dp_dump_start(&dump);
-    while (nl_dump_next(&dump, &msg)) {
+    while (nl_dump_next(&dump, &msg, &buf)) {
         struct dpif_linux_dp dp;
 
         if (!dpif_linux_dp_from_ofpbuf(&dp, &msg)) {
             sset_add(all_dps, dp.name);
         }
     }
+    ofpbuf_uninit(&buf);
     return nl_dump_done(&dump);
 }
 
@@ -708,6 +711,7 @@ dpif_linux_flow_flush(struct dpif *dpif_)
 
 struct dpif_linux_port_state {
     struct nl_dump dump;
+    struct ofpbuf buf;
 };
 
 static void
@@ -735,18 +739,20 @@ dpif_linux_port_dump_start(const struct dpif *dpif, void **statep)
     *statep = state = xmalloc(sizeof *state);
     dpif_linux_port_dump_start__(dpif, &state->dump);
 
+    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
     return 0;
 }
 
 static int
 dpif_linux_port_dump_next__(const struct dpif *dpif_, struct nl_dump *dump,
-                            struct dpif_linux_vport *vport)
+                            struct dpif_linux_vport *vport,
+                            struct ofpbuf *buffer)
 {
     struct dpif_linux *dpif = dpif_linux_cast(dpif_);
     struct ofpbuf buf;
     int error;
 
-    if (!nl_dump_next(dump, &buf)) {
+    if (!nl_dump_next(dump, &buf, buffer)) {
         return EOF;
     }
 
@@ -766,7 +772,8 @@ dpif_linux_port_dump_next(const struct dpif *dpif OVS_UNUSED, void *state_,
     struct dpif_linux_vport vport;
     int error;
 
-    error = dpif_linux_port_dump_next__(dpif, &state->dump, &vport);
+    error = dpif_linux_port_dump_next__(dpif, &state->dump, &vport,
+                                        &state->buf);
     if (error) {
         return error;
     }
@@ -782,6 +789,7 @@ dpif_linux_port_dump_done(const struct dpif *dpif_ OVS_UNUSED, void *state_)
     struct dpif_linux_port_state *state = state_;
     int error = nl_dump_done(&state->dump);
 
+    ofpbuf_uninit(&state->buf);
     free(state);
     return error;
 }
@@ -987,7 +995,8 @@ struct dpif_linux_flow_state {
     struct nl_dump dump;
     struct dpif_linux_flow flow;
     struct dpif_flow_stats stats;
-    struct ofpbuf *buf;
+    struct ofpbuf buffer;         /* Always used to store flows. */
+    struct ofpbuf *tmp;           /* Used if kernel does not supply actions. */
 };
 
 static int
@@ -1009,7 +1018,8 @@ dpif_linux_flow_dump_start(const struct dpif *dpif_, void **statep)
     nl_dump_start(&state->dump, NETLINK_GENERIC, buf);
     ofpbuf_delete(buf);
 
-    state->buf = NULL;
+    ofpbuf_init(&state->buffer, NL_DUMP_BUFSIZE);
+    state->tmp = NULL;
 
     return 0;
 }
@@ -1026,10 +1036,10 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
     int error;
 
     do {
-        ofpbuf_delete(state->buf);
-        state->buf = NULL;
+        ofpbuf_delete(state->tmp);
+        state->tmp = NULL;
 
-        if (!nl_dump_next(&state->dump, &buf)) {
+        if (!nl_dump_next(&state->dump, &buf, &state->buffer)) {
             return EOF;
         }
 
@@ -1041,7 +1051,7 @@ dpif_linux_flow_dump_next(const struct dpif *dpif_, void *state_,
         if (actions && !state->flow.actions) {
             error = dpif_linux_flow_get__(dpif_, state->flow.key,
                                           state->flow.key_len,
-                                          &state->flow, &state->buf);
+                                          &state->flow, &state->tmp);
             if (error == ENOENT) {
                 VLOG_DBG("dumped flow disappeared on get");
             } else if (error) {
@@ -1075,7 +1085,8 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_)
 {
     struct dpif_linux_flow_state *state = state_;
     int error = nl_dump_done(&state->dump);
-    ofpbuf_delete(state->buf);
+    ofpbuf_uninit(&state->buffer);
+    ofpbuf_delete(state->tmp);
     free(state);
     return error;
 }
@@ -1287,6 +1298,8 @@ dpif_linux_refresh_channels(struct dpif *dpif_)
     struct dpif_linux_vport vport;
     size_t keep_channels_nbits;
     struct nl_dump dump;
+    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf buf;
     int retval = 0;
     size_t i;
 
@@ -1303,8 +1316,9 @@ dpif_linux_refresh_channels(struct dpif *dpif_)
 
     dpif->n_events = dpif->event_offset = 0;
 
+    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
     dpif_linux_port_dump_start__(dpif_, &dump);
-    while (!dpif_linux_port_dump_next__(dpif_, &dump, &vport)) {
+    while (!dpif_linux_port_dump_next__(dpif_, &dump, &vport, &buf)) {
         uint32_t port_no = odp_to_u32(vport.port_no);
         struct nl_sock *sock = (port_no < dpif->uc_array_size
                                 ? dpif->channels[port_no].sock
@@ -1370,6 +1384,7 @@ dpif_linux_refresh_channels(struct dpif *dpif_)
         nl_sock_destroy(sock);
     }
     nl_dump_done(&dump);
+    ofpbuf_uninit(&buf);
 
     /* Discard any saved channels that we didn't reuse. */
     for (i = 0; i < keep_channels_nbits; i++) {
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index e756d88..828540d 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2184,8 +2184,13 @@ netdev_linux_get_queue_stats(const struct netdev *netdev_,
     return error;
 }
 
+struct queue_dump_state {
+    struct nl_dump dump;
+    struct ofpbuf buf;
+};
+
 static bool
-start_queue_dump(const struct netdev *netdev, struct nl_dump *dump)
+start_queue_dump(const struct netdev *netdev, struct queue_dump_state *state)
 {
     struct ofpbuf request;
     struct tcmsg *tcmsg;
@@ -2195,11 +2200,20 @@ start_queue_dump(const struct netdev *netdev, struct nl_dump *dump)
         return false;
     }
     tcmsg->tcm_parent = 0;
-    nl_dump_start(dump, NETLINK_ROUTE, &request);
+    nl_dump_start(&state->dump, NETLINK_ROUTE, &request);
     ofpbuf_uninit(&request);
+
+    ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
     return true;
 }
 
+static int
+finish_queue_dump(struct queue_dump_state *state)
+{
+    ofpbuf_uninit(&state->buf);
+    return nl_dump_done(&state->dump);
+}
+
 struct netdev_linux_queue_state {
     unsigned int *queues;
     size_t cur_queue;
@@ -2283,17 +2297,17 @@ netdev_linux_dump_queue_stats(const struct netdev *netdev_,
     ovs_mutex_lock(&netdev->mutex);
     error = tc_query_qdisc(netdev_);
     if (!error) {
-        struct nl_dump dump;
+        struct queue_dump_state state;
 
         if (!netdev->tc->ops->class_dump_stats) {
             error = EOPNOTSUPP;
-        } else if (!start_queue_dump(netdev_, &dump)) {
+        } else if (!start_queue_dump(netdev_, &state)) {
             error = ENODEV;
         } else {
             struct ofpbuf msg;
             int retval;
 
-            while (nl_dump_next(&dump, &msg)) {
+            while (nl_dump_next(&state.dump, &msg, &state.buf)) {
                 retval = netdev->tc->ops->class_dump_stats(netdev_, &msg,
                                                            cb, aux);
                 if (retval) {
@@ -2301,7 +2315,7 @@ netdev_linux_dump_queue_stats(const struct netdev *netdev_,
                 }
             }
 
-            retval = nl_dump_done(&dump);
+            retval = finish_queue_dump(&state);
             if (retval) {
                 error = retval;
             }
@@ -3079,7 +3093,7 @@ static int
 htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
 {
     struct ofpbuf msg;
-    struct nl_dump dump;
+    struct queue_dump_state state;
     struct htb_class hc;
 
     /* Get qdisc options. */
@@ -3088,17 +3102,17 @@ htb_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
     htb_install__(netdev, hc.max_rate);
 
     /* Get queues. */
-    if (!start_queue_dump(netdev, &dump)) {
+    if (!start_queue_dump(netdev, &state)) {
         return ENODEV;
     }
-    while (nl_dump_next(&dump, &msg)) {
+    while (nl_dump_next(&state.dump, &msg, &state.buf)) {
         unsigned int queue_id;
 
         if (!htb_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) {
             htb_update_queue__(netdev, queue_id, &hc);
         }
     }
-    nl_dump_done(&dump);
+    finish_queue_dump(&state);
 
     return 0;
 }
@@ -3579,18 +3593,18 @@ static int
 hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
 {
     struct ofpbuf msg;
-    struct nl_dump dump;
+    struct queue_dump_state state;
     struct hfsc_class hc;
 
     hc.max_rate = 0;
     hfsc_query_class__(netdev, tc_make_handle(1, 0xfffe), 0, &hc, NULL);
     hfsc_install__(netdev, hc.max_rate);
 
-    if (!start_queue_dump(netdev, &dump)) {
+    if (!start_queue_dump(netdev, &state)) {
         return ENODEV;
     }
 
-    while (nl_dump_next(&dump, &msg)) {
+    while (nl_dump_next(&state.dump, &msg, &state.buf)) {
         unsigned int queue_id;
 
         if (!hfsc_parse_tcmsg__(&msg, &queue_id, &hc, NULL)) {
@@ -3598,7 +3612,7 @@ hfsc_tc_load(struct netdev *netdev, struct ofpbuf *nlmsg OVS_UNUSED)
         }
     }
 
-    nl_dump_done(&dump);
+    finish_queue_dump(&state);
     return 0;
 }
 
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index 8cb1b8e..b5eac0f 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -690,7 +690,6 @@ nl_sock_drain(struct nl_sock *sock)
 void
 nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request)
 {
-    ofpbuf_init(&dump->buffer, 4096);
     dump->status = nl_pool_alloc(protocol, &dump->sock);
     if (dump->status) {
         return;
@@ -704,24 +703,24 @@ nl_dump_start(struct nl_dump *dump, int protocol, const struct ofpbuf *request)
 
 /* Helper function for nl_dump_next(). */
 static int
-nl_dump_recv(struct nl_dump *dump)
+nl_dump_recv(struct nl_dump *dump, struct ofpbuf *buffer)
 {
     struct nlmsghdr *nlmsghdr;
     int retval;
 
-    retval = nl_sock_recv__(dump->sock, &dump->buffer, true);
+    retval = nl_sock_recv__(dump->sock, buffer, true);
     if (retval) {
         return retval == EINTR ? EAGAIN : retval;
     }
 
-    nlmsghdr = nl_msg_nlmsghdr(&dump->buffer);
+    nlmsghdr = nl_msg_nlmsghdr(buffer);
     if (dump->nl_seq != nlmsghdr->nlmsg_seq) {
         VLOG_DBG_RL(&rl, "ignoring seq %#"PRIx32" != expected %#"PRIx32,
                     nlmsghdr->nlmsg_seq, dump->nl_seq);
         return EAGAIN;
     }
 
-    if (nl_msg_nlmsgerr(&dump->buffer, &retval)) {
+    if (nl_msg_nlmsgerr(buffer, &retval)) {
         VLOG_INFO_RL(&rl, "netlink dump request error (%s)",
                      ovs_strerror(retval));
         return retval && retval != EAGAIN ? retval : EPROTO;
@@ -730,12 +729,14 @@ nl_dump_recv(struct nl_dump *dump)
     return 0;
 }
 
-/* Attempts to retrieve another reply from 'dump', which must have been
- * initialized with nl_dump_start().
+/* Attempts to retrieve another reply from 'dump' into 'buffer'. 'dump' must
+ * have been initialized with nl_dump_start(), and 'buffer' must have been
+ * initialized. 'buffer' should be at least NL_DUMP_BUFSIZE bytes long.
  *
  * If successful, returns true and points 'reply->data' and 'reply->size' to
- * the message that was retrieved.  The caller must not modify 'reply' (because
- * it points into the middle of a larger buffer).
+ * the message that was retrieved. The caller must not modify 'reply' (because
+ * it points within 'buffer', which will be used by future calls to this
+ * function).
  *
  * On failure, returns false and sets 'reply->data' to NULL and 'reply->size'
  * to 0.  Failure might indicate an actual error or merely the end of replies.
@@ -743,7 +744,7 @@ nl_dump_recv(struct nl_dump *dump)
  * completed by calling nl_dump_done().
  */
 bool
-nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply)
+nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply, struct ofpbuf *buffer)
 {
     struct nlmsghdr *nlmsghdr;
 
@@ -753,10 +754,10 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply)
         return false;
     }
 
-    while (!dump->buffer.size) {
-        int retval = nl_dump_recv(dump);
+    while (!buffer->size) {
+        int retval = nl_dump_recv(dump, buffer);
         if (retval) {
-            ofpbuf_clear(&dump->buffer);
+            ofpbuf_clear(buffer);
             if (retval != EAGAIN) {
                 dump->status = retval;
                 return false;
@@ -764,7 +765,7 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply)
         }
     }
 
-    nlmsghdr = nl_msg_next(&dump->buffer, reply);
+    nlmsghdr = nl_msg_next(buffer, reply);
     if (!nlmsghdr) {
         VLOG_WARN_RL(&rl, "netlink dump reply contains message fragment");
         dump->status = EPROTO;
@@ -783,18 +784,22 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply)
 int
 nl_dump_done(struct nl_dump *dump)
 {
+    uint64_t tmp_reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf buf;
+
     /* Drain any remaining messages that the client didn't read.  Otherwise the
      * kernel will continue to queue them up and waste buffer space.
      *
      * XXX We could just destroy and discard the socket in this case. */
+    ofpbuf_use_stub(&buf, tmp_reply_stub, sizeof tmp_reply_stub);
     while (!dump->status) {
         struct ofpbuf reply;
-        if (!nl_dump_next(dump, &reply)) {
+        if (!nl_dump_next(dump, &reply, &buf)) {
             ovs_assert(dump->status);
         }
     }
     nl_pool_release(dump->sock);
-    ofpbuf_uninit(&dump->buffer);
+    ofpbuf_uninit(&buf);
     return dump->status == EOF ? 0 : dump->status;
 }
 
diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h
index 5fedfe9..211c132 100644
--- a/lib/netlink-socket.h
+++ b/lib/netlink-socket.h
@@ -96,16 +96,17 @@ int nl_transact(int protocol, const struct ofpbuf *request,
 void nl_transact_multiple(int protocol, struct nl_transaction **, size_t n);
 
 /* Table dumping. */
+#define NL_DUMP_BUFSIZE         4096
+
 struct nl_dump {
     struct nl_sock *sock;       /* Socket being dumped. */
     uint32_t nl_seq;            /* Expected nlmsg_seq for replies. */
-    struct ofpbuf buffer;       /* Receive buffer currently being iterated. */
     int status;                 /* 0=OK, EOF=done, or positive errno value. */
 };
 
 void nl_dump_start(struct nl_dump *, int protocol,
                    const struct ofpbuf *request);
-bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply);
+bool nl_dump_next(struct nl_dump *, struct ofpbuf *reply, struct ofpbuf *buf);
 int nl_dump_done(struct nl_dump *);
 
 /* Miscellaneous */
diff --git a/lib/route-table.c b/lib/route-table.c
index 1afc01d..fdc21e8 100644
--- a/lib/route-table.c
+++ b/lib/route-table.c
@@ -223,7 +223,8 @@ route_table_reset(void)
 {
     struct nl_dump dump;
     struct rtgenmsg *rtmsg;
-    struct ofpbuf request, reply;
+    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf request, reply, buf;
 
     route_map_clear();
     route_table_valid = true;
@@ -238,13 +239,15 @@ route_table_reset(void)
     nl_dump_start(&dump, NETLINK_ROUTE, &request);
     ofpbuf_uninit(&request);
 
-    while (nl_dump_next(&dump, &reply)) {
+    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
+    while (nl_dump_next(&dump, &reply, &buf)) {
         struct route_table_msg msg;
 
         if (route_table_parse(&reply, &msg)) {
             route_table_handle_msg(&msg);
         }
     }
+    ofpbuf_uninit(&buf);
 
     return nl_dump_done(&dump);
 }
@@ -407,7 +410,8 @@ name_table_reset(void)
 {
     struct nl_dump dump;
     struct rtgenmsg *rtmsg;
-    struct ofpbuf request, reply;
+    uint64_t reply_stub[NL_DUMP_BUFSIZE / 8];
+    struct ofpbuf request, reply, buf;
 
     name_table_valid = true;
     name_map_clear();
@@ -420,7 +424,8 @@ name_table_reset(void)
     nl_dump_start(&dump, NETLINK_ROUTE, &request);
     ofpbuf_uninit(&request);
 
-    while (nl_dump_next(&dump, &reply)) {
+    ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub);
+    while (nl_dump_next(&dump, &reply, &buf)) {
         struct rtnetlink_link_change change;
 
         if (rtnetlink_link_parse(&reply, &change)
@@ -434,6 +439,7 @@ name_table_reset(void)
             hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0));
         }
     }
+    ofpbuf_uninit(&buf);
     return nl_dump_done(&dump);
 }
 
-- 
1.7.9.5




More information about the dev mailing list