[ovs-dev] [PATCH v3 05/11] dpif-netdev: Wait an RCU grace period before freeing ports.

Daniele Di Proietto diproiettod at vmware.com
Tue Mar 15 22:29:57 UTC 2016


The dpif-netdev datapath keeps ports in a cmap which is written only by
the main thread (holding port_mutex), but which is read concurrently by
many threads (most notably the pmd threads).

When removing ports from the datapath we should postpone the deletion,
otherwise another thread might access invalid memory while reading the
cmap.

This commit splits do_port_del() in do_port_remove() and
do_port_destroy(): the former removes the port from the cmap, while the
latter reclaims the memory and drops the reference to the underlying
netdev.

dpif_netdev_del_port() now uses ovsrcu_synchronize() before calling
do_port_destroy(), to avoid memory corruption in concurrent readers.

I've not been able to reproduce the bug in practice, since when the
datapath modifies its ports its stops (or locks out) most of the threads
than may access the cmap.  This change is done for two reasons:
* Using RCU is more in line with other cmap users.
* We might want to allow port removal and queue reconfiguration without
  stopping completely all the pmd threads.

Signed-off-by: Daniele Di Proietto <diproiettod at vmware.com>
---
 lib/dpif-netdev.c | 120 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 80 insertions(+), 40 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8c8fce5..3e07ddf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -469,8 +469,9 @@ static void dp_netdev_free(struct dp_netdev *)
 static int do_add_port(struct dp_netdev *dp, const char *devname,
                        const char *type, odp_port_t port_no)
     OVS_REQUIRES(dp->port_mutex);
-static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *)
+static void do_remove_port(struct dp_netdev *dp, struct dp_netdev_port *)
     OVS_REQUIRES(dp->port_mutex);
+static void do_destroy_port(struct dp_netdev_port *);
 static int dpif_netdev_open(const struct dpif_class *, const char *name,
                             bool create, struct dpif **);
 static void dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,
@@ -495,6 +496,7 @@ static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
 static struct dp_netdev_pmd_thread *
 dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
 static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
+static bool has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
@@ -969,7 +971,8 @@ static void
 dp_netdev_free(struct dp_netdev *dp)
     OVS_REQUIRES(dp_netdev_mutex)
 {
-    struct dp_netdev_port *port;
+    struct dp_netdev_port *port, **port_list;
+    size_t n_ports, k;
 
     shash_find_and_delete(&dp_netdevs, dp->name);
 
@@ -978,11 +981,25 @@ dp_netdev_free(struct dp_netdev *dp)
     ovsthread_key_delete(dp->per_pmd_key);
 
     ovs_mutex_lock(&dp->port_mutex);
+    n_ports = cmap_count(&dp->ports);
+    port_list = xcalloc(n_ports, sizeof *port_list);
+
+    k = 0;
     CMAP_FOR_EACH (port, node, &dp->ports) {
-        /* PMD threads are destroyed here. do_del_port() cannot quiesce */
-        do_del_port(dp, port);
+        do_remove_port(dp, port);
+        ovs_assert(k < n_ports);
+        port_list[k++] = port;
     }
     ovs_mutex_unlock(&dp->port_mutex);
+
+    ovsrcu_synchronize();
+
+    for (size_t i = 0; i < k; i++) {
+        do_destroy_port(port_list[k]);
+    }
+
+    free(port_list);
+
     cmap_destroy(&dp->poll_threads);
 
     seq_destroy(dp->port_seq);
@@ -1226,22 +1243,49 @@ static int
 dpif_netdev_port_del(struct dpif *dpif, odp_port_t port_no)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
+    struct dp_netdev_port *port = NULL;
     int error;
 
     ovs_mutex_lock(&dp->port_mutex);
     if (port_no == ODPP_LOCAL) {
         error = EINVAL;
     } else {
-        struct dp_netdev_port *port;
 
         error = get_port_by_number(dp, port_no, &port);
         if (!error) {
-            do_del_port(dp, port);
+            do_remove_port(dp, port);
         }
     }
     ovs_mutex_unlock(&dp->port_mutex);
 
-    return error;
+    if (!port) {
+        return error;
+    }
+
+    if (netdev_is_pmd(port->netdev)) {
+        int numa_id = netdev_get_numa_id(port->netdev);
+
+        /* PMD threads can not be on invalid numa node. */
+        ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
+        /* If there is no netdev on the numa node, deletes the pmd threads
+         * for that numa.  Else, deletes the queues from polling lists. */
+        if (!has_pmd_port_for_numa(dp, numa_id)) {
+            dp_netdev_del_pmds_on_numa(dp, numa_id);
+        } else {
+            dp_netdev_del_port_from_all_pmds(dp, port);
+        }
+    }
+
+    /* 'port' is RCU protected, we need to wait before freeing it.
+     * Postponing is not acceptable because the above layer assumes that
+     * we have dropped every reference to the netdev before returning, so
+     * that, for example, when a vsctl command returns the netdev will
+     * effectively be removed and ready to be eventually readded. */
+    ovsrcu_synchronize();
+
+    do_destroy_port(port);
+
+    return 0;
 }
 
 static bool
@@ -1276,25 +1320,6 @@ get_port_by_number(struct dp_netdev *dp,
     }
 }
 
-static void
-port_destroy(struct dp_netdev_port *port)
-{
-    if (!port) {
-        return;
-    }
-
-    netdev_close(port->netdev);
-    netdev_restore_flags(port->sf);
-
-    for (unsigned i = 0; i < port->n_rxq; i++) {
-        netdev_rxq_close(port->rxq[i]);
-    }
-
-    free(port->rxq);
-    free(port->type);
-    free(port);
-}
-
 static int
 get_port_by_name(struct dp_netdev *dp,
                  const char *devname, struct dp_netdev_port **portp)
@@ -1350,28 +1375,43 @@ has_pmd_port_for_numa(struct dp_netdev *dp, int numa_id)
     return false;
 }
 
-
+/* Port removal is a multi step process:
+ * 1. If the device is a pmd netdev, its queues must be removed from the pmd
+ *    threads.
+ * 2. The port must be removed from the 'dp->ports' cmap with do_remove_port().
+ * 3. The port memory must be released and the netdev reference must be dropped
+ *    with do_destroy_port().
+ *
+ * Order between step 1 and 2 is not important.
+ *
+ * Step 2 and 3 must be split by an RCU grace period: this can be accomplished
+ * via postponing step 3 or, if the thread really needs to wait for the netdev
+ * references to be dropped, via an ovsrcu_synchronize() call */
 static void
-do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
+do_remove_port(struct dp_netdev *dp, struct dp_netdev_port *port)
     OVS_REQUIRES(dp->port_mutex)
 {
     cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
     seq_change(dp->port_seq);
-    if (netdev_is_pmd(port->netdev)) {
-        int numa_id = netdev_get_numa_id(port->netdev);
+}
 
-        /* PMD threads can not be on invalid numa node. */
-        ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
-        /* If there is no netdev on the numa node, deletes the pmd threads
-         * for that numa.  Else, deletes the queues from polling lists. */
-        if (!has_pmd_port_for_numa(dp, numa_id)) {
-            dp_netdev_del_pmds_on_numa(dp, numa_id);
-        } else {
-            dp_netdev_del_port_from_all_pmds(dp, port);
-        }
+static void
+do_destroy_port(struct dp_netdev_port *port)
+{
+    if (!port) {
+        return;
     }
 
-    port_destroy(port);
+    netdev_close(port->netdev);
+    netdev_restore_flags(port->sf);
+
+    for (unsigned i = 0; i < port->n_rxq; i++) {
+        netdev_rxq_close(port->rxq[i]);
+    }
+
+    free(port->rxq);
+    free(port->type);
+    free(port);
 }
 
 static void
-- 
2.1.4




More information about the dev mailing list