[ovs-dev] [PATCH ovn 3/4] ovn-nbctl: Support option --print-wait-time for latency measurement.

Han Zhou hzhou at ovn.org
Tue Sep 1 23:26:44 UTC 2020


This patch adds option --print-wait-time, which can be used together
with --wait=sb/hv to print the time spent on respective processing.

For example:
$ ovn-nbctl --print-wait-time --wait=hv sync
Time spent on processing nb_cfg 392:
    ovn-northd delay before processing: 1ms
    ovn-northd completion:              32ms
    ovn-controller(s) completion:       39ms

Without this, we had to use "time" command to measure the total time
spent by the ovn-nbctl command. However, it doesn't refect the actual
control plane latency, because the nb_cfg updates from all hypervisors
to SB DB contributes a significant part to the time spent on waiting
by the ovn-nbctl command. This doesn't reflect the real production
scenario. With this option (and the timestamp support from the previous
commit), we can measure the exact time when hypervisors completed
SB DB change handling and enforcing to OVS.

Signed-off-by: Han Zhou <hzhou at ovn.org>
---
 tests/ovn.at              |  3 +++
 utilities/ovn-nbctl.8.xml | 17 +++++++++++++++++
 utilities/ovn-nbctl.c     | 38 +++++++++++++++++++++++++++++++++-----
 3 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/tests/ovn.at b/tests/ovn.at
index 55257c3..f238b66 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -21451,6 +21451,9 @@ hv3_ts=$(ovn-sbctl --bare --columns=nb_cfg_timestamp find chassis_private name=h
 hv_cfg_ts=$(ovn-nbctl get nb_global . hv_cfg_timestamp)
 AT_CHECK([test x$hv3_ts == x$hv_cfg_ts])
 
+AT_CHECK([test x$(ovn-nbctl --print-wait-time --wait=sb sync | grep ms | wc -l) == x2])
+AT_CHECK([test x$(ovn-nbctl --print-wait-time --wait=hv sync | grep ms | wc -l) == x3])
+
 for i in $(seq 2 $n); do
     OVN_CLEANUP_SBOX([hv$i])
 done
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index 07447d2..fcc4312 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -1322,6 +1322,23 @@
         </p>
       </dd>
 
+    <dt><code>--print-wait-time</code></dt>
+    <dd>
+      When <code>--wait</code> is specified, the option
+      <code>--print-wait-time</code> can be used to print the time spent on
+      waiting, depending on the value specified in <code> --wait</code> option.
+      If <code>--wait=sb</code> is specified, it prints "ovn-northd delay
+      before processing", which is the time between the Northbound DB update by
+      the command and the moment when <code> ovn-northd</code> starts
+      processing the update, and "ovn-northd completion", which is the time
+      between the Northbound DB update and the moment when
+      <code>ovn-northd</code> completes the Southbound DB updating
+      successfully. If <code>--wait=hv</code> is specified, in addition to the
+      above information, it also prints "ovn-controller(s) completion", which
+      is the time between the Northbound DB update and the moment when the
+      slowest hypervisor finishes processing the update.
+    </dd>
+
     <dt><code>--db</code> <var>database</var></dt>
     <dd>
       The OVSDB database remote to contact.  If the <env>OVN_NB_DB</env>
diff --git a/utilities/ovn-nbctl.c b/utilities/ovn-nbctl.c
index d7bb4b4..57dfaa1 100644
--- a/utilities/ovn-nbctl.c
+++ b/utilities/ovn-nbctl.c
@@ -63,6 +63,8 @@ enum nbctl_wait_type {
 };
 static enum nbctl_wait_type wait_type = NBCTL_WAIT_NONE;
 
+static bool print_wait_time = false;
+
 /* Should we wait (if specified by 'wait_type') even if the commands don't
  * change the database at all? */
 static bool force_wait = false;
@@ -302,14 +304,16 @@ main_loop(const char *args, struct ctl_command *commands, size_t n_commands,
 #define MAIN_LOOP_OPTION_ENUMS                  \
         OPT_NO_WAIT,                            \
         OPT_WAIT,                               \
+        OPT_PRINT_WAIT_TIME,                    \
         OPT_DRY_RUN,                            \
         OPT_ONELINE
 
-#define MAIN_LOOP_LONG_OPTIONS                           \
-        {"no-wait", no_argument, NULL, OPT_NO_WAIT},     \
-        {"wait", required_argument, NULL, OPT_WAIT},     \
-        {"dry-run", no_argument, NULL, OPT_DRY_RUN},     \
-        {"oneline", no_argument, NULL, OPT_ONELINE},     \
+#define MAIN_LOOP_LONG_OPTIONS                                          \
+        {"no-wait", no_argument, NULL, OPT_NO_WAIT},                    \
+        {"wait", required_argument, NULL, OPT_WAIT},                    \
+        {"print-wait-time", no_argument, NULL, OPT_PRINT_WAIT_TIME},    \
+        {"dry-run", no_argument, NULL, OPT_DRY_RUN},                    \
+        {"oneline", no_argument, NULL, OPT_ONELINE},                    \
         {"timeout", required_argument, NULL, 't'}
 
 enum {
@@ -358,6 +362,10 @@ handle_main_loop_option(int opt, const char *arg, bool *handled)
         }
         break;
 
+    case OPT_PRINT_WAIT_TIME:
+        print_wait_time = true;
+        break;
+
     case OPT_DRY_RUN:
         dry_run = true;
         break;
@@ -777,6 +785,7 @@ Options:\n\
   --no-shuffle-remotes        do not shuffle the order of remotes\n\
   --wait=sb                   wait for southbound database update\n\
   --wait=hv                   wait for all chassis to catch up\n\
+  --print-wait-time           print time spent on waiting\n\
   -t, --timeout=SECS          wait at most SECS seconds\n\
   --dry-run                   do not commit changes to database\n\
   --oneline                   print exactly one line of output per command\n",
@@ -5992,8 +6001,10 @@ run_prerequisites(struct ctl_command *commands, size_t n_commands,
     ovsdb_idl_add_table(idl, &nbrec_table_nb_global);
     if (wait_type == NBCTL_WAIT_SB) {
         ovsdb_idl_add_column(idl, &nbrec_nb_global_col_sb_cfg);
+        ovsdb_idl_add_column(idl, &nbrec_nb_global_col_sb_cfg_timestamp);
     } else if (wait_type == NBCTL_WAIT_HV) {
         ovsdb_idl_add_column(idl, &nbrec_nb_global_col_hv_cfg);
+        ovsdb_idl_add_column(idl, &nbrec_nb_global_col_hv_cfg_timestamp);
     }
 
     for (struct ctl_command *c = commands; c < &commands[n_commands]; c++) {
@@ -6065,6 +6076,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands,
     struct shash_node *node;
     int64_t next_cfg = 0;
     char *error = NULL;
+    int64_t start_time = 0;
 
     ovs_assert(retry);
 
@@ -6132,6 +6144,7 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands,
         }
     }
 
+    start_time = time_wall_msec();
     status = ovsdb_idl_txn_commit_block(txn);
     if (wait_type != NBCTL_WAIT_NONE && status == TXN_SUCCESS) {
         next_cfg = ovsdb_idl_txn_get_increment_new_value(txn);
@@ -6203,6 +6216,21 @@ do_nbctl(const char *args, struct ctl_command *commands, size_t n_commands,
                                    ? nb->sb_cfg
                                    : nb->hv_cfg);
                 if (cur_cfg >= next_cfg) {
+                    if (print_wait_time) {
+                        printf("Time spent on processing nb_cfg %"PRId64":\n",
+                               next_cfg);
+                        printf("\tovn-northd delay before processing:"
+                               "\t%"PRId64"ms\n",
+                               nb->nb_cfg_timestamp - start_time);
+                        printf("\tovn-northd completion:"
+                               "\t\t\t%"PRId64"ms\n",
+                               nb->sb_cfg_timestamp - start_time);
+                        if (wait_type == NBCTL_WAIT_HV) {
+                            printf("\tovn-controller(s) completion:"
+                                   "\t\t%"PRId64"ms\n",
+                                   nb->hv_cfg_timestamp - start_time);
+                        }
+                    }
                     goto done;
                 }
             }
-- 
2.1.0



More information about the dev mailing list