[ovs-dev] [PATCH v3] ovn-northd: Add the option to pause and resume

nusiddiq at redhat.com nusiddiq at redhat.com
Wed Jul 17 00:56:09 UTC 2019


From: Numan Siddique <nusiddiq at redhat.com>

This patch adds 3 unixctl socket comments - pause, resume and is-paused.

Usage: ovs-appctl -t ovn-northd pause/resume/is-paused

This feature will be useful if the CMS wants to
  - deploy OVN DB servers in active/passive mode and
  - run ovn-northd on all these nodes and use unix ctl sockets to
    connect to the local OVN DB servers.

On the nodes where OVN Db ovsdb-servers are in passive mode, the local ovn-northds
will process the DB changes and compute logical flows to be thrown out later,
because write transactions are not allowed by these ovsdb-servers. It results in
unncessary CPU usage.

With these commands, CMS can pause ovn-northd on these node. A node
which becomes master, can resume the ovn-northd.

One use case is to use this feature in ovn-kubernetes with the above deployment model.

Signed-off-by: Numan Siddique <nusiddiq at redhat.com>
---

v2 -> v3
=======
  * Resolved merge conflicts.

v1 -> v2
=======
  * Addressed the review comments from Ben and add more documentation
    about the runtime options added by this patch.
  * v1 had an issue - When paused, it was not even waking up to process
    the IDL updates. In v2, the main thread, wakes up to process any
    IDL updates, but doesn't do any logical flow computations.

 ovn/northd/ovn-northd.8.xml |  48 ++++++++++++++
 ovn/northd/ovn-northd.c     | 121 +++++++++++++++++++++++++++---------
 tests/ovn-northd.at         |  38 +++++++++++
 3 files changed, 179 insertions(+), 28 deletions(-)

diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml
index d2267de0e..1d0243656 100644
--- a/ovn/northd/ovn-northd.8.xml
+++ b/ovn/northd/ovn-northd.8.xml
@@ -70,6 +70,23 @@
       <dd>
         Causes <code>ovn-northd</code> to gracefully terminate.
       </dd>
+
+      <dt><code>pause</code></dt>
+      <dd>
+        Pauses the ovn-northd operation from processing any Northbound and
+        Southbound database changes.
+      </dd>
+
+      <dt><code>resume</code></dt>
+      <dd>
+        Resumes the ovn-northd operation to process Northbound and
+        Southbound database contents and generate logical flows.
+      </dd>
+
+      <dt><code>is-paused</code></dt>
+      <dd>
+        Returns "true" if ovn-northd is currently paused, "false" otherwise.
+      </dd>
       </dl>
     </p>
 
@@ -82,6 +99,37 @@
       of <code>ovn-northd</code> will automatically take over.
     </p>
 
+    <h2> Active-Standby with multiple OVN DB servers</h2>
+    <p>
+      You may run multiple OVN DB servers in an OVN deployment with:
+      <ul>
+        <li>
+          OVN DB servers deployed in active/passive mode with one active
+          and multiple passive ovsdb-servers.
+        </li>
+
+        <li>
+          <code>ovn-northd</code> also deployed on all these nodes,
+          using unix ctl sockets to connect to the local OVN DB servers.
+        </li>
+      </ul>
+    </p>
+
+    <p>
+      In such deployments, the ovn-northds on the passive nodes will process
+      the DB changes and compute logical flows to be thrown out later,
+      because write transactions are not allowed by the passive ovsdb-servers.
+      It results in unnecessary CPU usage.
+    </p>
+
+    <p>
+      With the help of runtime management command <code>pause</code>, you can
+      pause <code>ovn-northd</code> on these nodes. When a passive node
+      becomes master, you can use the runtime management command
+      <code>resume</code> to resume the <code>ovn-northd</code> to process the
+      DB changes.
+    </p>
+
     <h1>Logical Flow Table Structure</h1>
 
     <p>
diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c
index eb6c47cad..8e13901e0 100644
--- a/ovn/northd/ovn-northd.c
+++ b/ovn/northd/ovn-northd.c
@@ -52,6 +52,9 @@
 VLOG_DEFINE_THIS_MODULE(ovn_northd);
 
 static unixctl_cb_func ovn_northd_exit;
+static unixctl_cb_func ovn_northd_pause;
+static unixctl_cb_func ovn_northd_resume;
+static unixctl_cb_func ovn_northd_is_paused;
 
 struct northd_context {
     struct ovsdb_idl *ovnnb_idl;
@@ -9182,6 +9185,7 @@ main(int argc, char *argv[])
     struct unixctl_server *unixctl;
     int retval;
     bool exiting;
+    bool paused;
 
     fatal_ignore_sigpipe();
     ovs_cmdl_proctitle_init(argc, argv);
@@ -9196,6 +9200,10 @@ main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
+    unixctl_command_register("pause", "", 0, 0, ovn_northd_pause, &paused);
+    unixctl_command_register("resume", "", 0, 0, ovn_northd_resume, &paused);
+    unixctl_command_register("is-paused", "", 0, 0, ovn_northd_is_paused,
+                             &paused);
 
     daemonize_complete();
 
@@ -9384,34 +9392,51 @@ main(int argc, char *argv[])
 
     /* Main loop. */
     exiting = false;
+    paused = false;
     while (!exiting) {
-        struct northd_context ctx = {
-            .ovnnb_idl = ovnnb_idl_loop.idl,
-            .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
-            .ovnsb_idl = ovnsb_idl_loop.idl,
-            .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
-            .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name,
-            .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp,
-            .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
-        };
-
-        if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
-            VLOG_INFO("ovn-northd lock acquired. "
-                      "This ovn-northd instance is now active.");
-            had_lock = true;
-        } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
-            VLOG_INFO("ovn-northd lock lost. "
-                      "This ovn-northd instance is now on standby.");
-            had_lock = false;
-        }
-
-        if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
-            ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
-            if (ctx.ovnsb_txn) {
-                check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
-                check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
-                check_and_update_rbac(&ctx);
+        /* unixctl_server_run could modify the value of 'paused'.
+         * So store the value in local 'paused_' so that we run
+         * 'ovsdb_idl_loop_commit_and_wait() at the end of the loop. */
+        bool paused_ = paused;
+
+        if (!paused_) {
+            struct northd_context ctx = {
+                .ovnnb_idl = ovnnb_idl_loop.idl,
+                .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
+                .ovnsb_idl = ovnsb_idl_loop.idl,
+                .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
+                .sbrec_ha_chassis_grp_by_name = sbrec_ha_chassis_grp_by_name,
+                .sbrec_mcast_group_by_name_dp = sbrec_mcast_group_by_name_dp,
+                .sbrec_ip_mcast_by_dp = sbrec_ip_mcast_by_dp,
+            };
+
+            if (!had_lock && ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+                VLOG_INFO("ovn-northd lock acquired. "
+                        "This ovn-northd instance is now active.");
+                had_lock = true;
+            } else if (had_lock && !ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+                VLOG_INFO("ovn-northd lock lost. "
+                        "This ovn-northd instance is now on standby.");
+                had_lock = false;
             }
+
+            if (ovsdb_idl_has_lock(ovnsb_idl_loop.idl)) {
+                ovn_db_run(&ctx, sbrec_chassis_by_name, &ovnsb_idl_loop);
+                if (ctx.ovnsb_txn) {
+                    check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
+                    check_and_add_supported_dhcpv6_opts_to_sb_db(&ctx);
+                    check_and_update_rbac(&ctx);
+                }
+            }
+        } else {
+            /* ovn-northd is paused
+             *    - we still want to handle any db updates and update the
+             *      local IDL. Otherwise, when it is resumed, the local IDL
+             *      copy will be out of sync.
+             *    - but we don't want to create any txns.
+             * */
+            ovsdb_idl_run(ovnnb_idl_loop.idl);
+            ovsdb_idl_run(ovnsb_idl_loop.idl);
         }
 
         unixctl_server_run(unixctl);
@@ -9419,8 +9444,16 @@ main(int argc, char *argv[])
         if (exiting) {
             poll_immediate_wake();
         }
-        ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
-        ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+
+        if (!paused_) {
+            ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
+            ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
+        } else {
+            /* ovn-northd is paused, but we still want to wake up for any db
+             * updates. */
+            ovsdb_idl_wait(ovnnb_idl_loop.idl);
+            ovsdb_idl_wait(ovnsb_idl_loop.idl);
+        }
 
         poll_block();
         if (should_service_stop()) {
@@ -9445,3 +9478,35 @@ ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
 
     unixctl_command_reply(conn, NULL);
 }
+
+static void
+ovn_northd_pause(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                const char *argv[] OVS_UNUSED, void *pause_)
+{
+    bool *pause = pause_;
+    *pause = true;
+
+    unixctl_command_reply(conn, NULL);
+}
+
+static void
+ovn_northd_resume(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                  const char *argv[] OVS_UNUSED, void *pause_)
+{
+    bool *pause = pause_;
+    *pause = false;
+
+    unixctl_command_reply(conn, NULL);
+}
+
+static void
+ovn_northd_is_paused(struct unixctl_conn *conn, int argc OVS_UNUSED,
+                     const char *argv[] OVS_UNUSED, void *paused_)
+{
+    bool *paused = paused_;
+    if (*paused) {
+        unixctl_command_reply(conn, "true");
+    } else {
+        unixctl_command_reply(conn, "false");
+    }
+}
diff --git a/tests/ovn-northd.at b/tests/ovn-northd.at
index 62e58fd0e..0dea04edc 100644
--- a/tests/ovn-northd.at
+++ b/tests/ovn-northd.at
@@ -898,3 +898,41 @@ as northd
 OVS_APP_EXIT_AND_WAIT([ovn-northd])
 
 AT_CLEANUP
+
+AT_SETUP([ovn -- ovn-northd pause and resume])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+ovn-nbctl ls-add sw0
+
+OVS_WAIT_UNTIL([
+    ovn-sbctl lflow-list sw0
+    test 0 = $?])
+
+ovn-nbctl ls-del sw0
+OVS_WAIT_UNTIL([
+    ovn-sbctl lflow-list sw0
+    test 1 = $?])
+
+# Now pause the ovn-northd
+as northd ovs-appctl -t ovn-northd pause
+AT_CHECK([test xtrue = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+ovn-nbctl ls-add sw0
+
+# There should be no logical flows for sw0 datapath.
+OVS_WAIT_UNTIL([
+    ovn-sbctl lflow-list sw0
+    test 1 = $?])
+
+# Now resume ovn-northd
+as northd ovs-appctl -t ovn-northd resume
+AT_CHECK([test xfalse = x`as northd ovs-appctl -t ovn-northd is-paused`])
+
+OVS_WAIT_UNTIL([
+    ovn-sbctl lflow-list sw0
+    test 0 = $?])
+
+AT_CLEANUP
-- 
2.21.0



More information about the dev mailing list