[ovs-dev] [PATCH] ovn-controller: Add unix reconnect command

Zhen Wang zhewang at nvidia.com
Thu Aug 13 20:57:43 UTC 2020


OVN SB in clustered mode, all the ovn-controller clients connect
across all the nodes in a balanced state. When one raft node down and
online. All the ovn-controller clients will not migirate back which
cause RAFT DB clients unbalanced state.
RAFT clients in an unbalanced state would trigger more stress to the SB.
This commit introduce one unix command reconnect remote which let user
trigger a force reconnect to desisred RAFT node which can adddress the
problem.
Note: this patch requires ovsdb-idl function ovsdb_idl_set_next_remote.

Reported-at:https://mail.openvswitch.org/pipermail/ovs-discuss/2020-August/050518.html
Signed-off-by: Zhen Wang <zhewang at nvidia.com>
---
 controller/ovn-controller.8.xml | 12 ++++++++++++
 controller/ovn-controller.c     | 27 +++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index 66877314c..66f521398 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -507,6 +507,18 @@
         local index so that it can interact with the southbound database again.
       </p>
       </dd>
+
+      <dt><code>reconnect</code></dt>
+      <dd>
+      <p>
+        Trigger a force reconnect to one specific remote in Open_vSwitch table
+        external_ids:ovn-remote.
+      </p>
+      <p>
+        This command is intended to use in the event of clustered SB DB has
+        unbalanced clients across the raft nodes.
+      </p>
+      </dd>
       </dl>
     </p>
 
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index ea6a436c0..46ed90492 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -68,6 +68,7 @@
 VLOG_DEFINE_THIS_MODULE(main);
 
 static unixctl_cb_func ovn_controller_exit;
+static unixctl_cb_func ovn_controller_reconnect;
 static unixctl_cb_func ct_zone_list;
 static unixctl_cb_func extend_table_list;
 static unixctl_cb_func inject_pkt;
@@ -2078,6 +2079,11 @@ struct ovn_controller_exit_args {
     bool *restart;
 };
 
+struct ovn_controller_reconnect_args {
+    bool trigger;
+    char *ovn_remote;
+};
+
 int
 main(int argc, char *argv[])
 {
@@ -2085,6 +2091,7 @@ main(int argc, char *argv[])
     bool exiting;
     bool restart;
     struct ovn_controller_exit_args exit_args = {&exiting, &restart};
+    struct ovn_controller_reconnect_args reconnect = {false, NULL};
     int retval;
 
     ovs_cmdl_proctitle_init(argc, argv);
@@ -2103,6 +2110,8 @@ main(int argc, char *argv[])
     }
     unixctl_command_register("exit", "", 0, 1, ovn_controller_exit,
                              &exit_args);
+    unixctl_command_register("reconnect", "", 1, 1, ovn_controller_reconnect,
+                             &reconnect);
 
     daemonize_complete();
 
@@ -2511,6 +2520,13 @@ main(int argc, char *argv[])
                                                sb_monitor_all);
                         }
                     }
+                    if (reconnect.trigger && ovsdb_idl_is_connected(ovnsb_idl_loop.idl)) {
+                        VLOG_INFO("User triggered force reconnect to %s", reconnect.ovn_remote);
+                        ovsdb_idl_set_next_remote(ovnsb_idl_loop.idl, reconnect.ovn_remote);
+                        ovsdb_idl_force_reconnect(ovnsb_idl_loop.idl);
+                        free(reconnect.ovn_remote);
+                        reconnect.trigger = false;
+                    }
                 }
 
             }
@@ -2674,6 +2690,7 @@ main(int argc, char *argv[])
     ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
 
     free(ovs_remote);
+    free(reconnect.ovn_remote);
     service_stop();
 
     exit(retval);
@@ -2780,6 +2797,16 @@ ovn_controller_exit(struct unixctl_conn *conn, int argc,
     unixctl_command_reply(conn, NULL);
 }
 
+static void
+ovn_controller_reconnect(struct unixctl_conn *conn, int argc OVS_UNUSED,
+             const char *argv[], void *reconnect_args_)
+{
+    struct ovn_controller_reconnect_args *reconnect_args = reconnect_args_;
+    reconnect_args->trigger = true;
+    reconnect_args->ovn_remote = xstrdup(argv[1]);
+    unixctl_command_reply(conn, NULL);
+}
+
 static void
 ct_zone_list(struct unixctl_conn *conn, int argc OVS_UNUSED,
              const char *argv[] OVS_UNUSED, void *ct_zones_)
-- 
2.20.1



More information about the dev mailing list