[ovs-dev] [PATCH V1 1/3] coverage: Reimplement the "ovs-appclt coverage/show" command

Alex Wang alexw at nicira.com
Mon Jun 17 18:23:07 UTC 2013


[PATCH V2 1/3] coverage: Reimplement the "ovs-appclt coverage/show" command

This commit changes the "ovs-appclt coverage/show" command to show the
per-second, per-minute and per-hour rates of function invocation. More
importantly, this makes using coverage counter an easy way to monitor
the execution of specific functions.

Signed-off-by: Alex Wang <alexw at nicira.com>
---

v1 -> v2:
- abandon the wakeup timer and coverage_wait() function

---
 lib/coverage-unixctl.man |    3 +-
 lib/coverage.c           |   95 ++++++++++++++++++++++++++++++
+++++++++-------
 lib/coverage.h           |   17 +++++++--
 lib/timeval.c            |    1 -
 vswitchd/ovs-vswitchd.c  |    2 +
 5 files changed, 99 insertions(+), 19 deletions(-)

diff --git a/lib/coverage-unixctl.man b/lib/coverage-unixctl.man
index 9718894..2d283dc 100644
--- a/lib/coverage-unixctl.man
+++ b/lib/coverage-unixctl.man
@@ -8,4 +8,5 @@ main loop takes unusually long to run.
 Coverage counters are useful mainly for performance analysis and
 debugging.
 .IP "\fBcoverage/show\fR"
-Displays the values of all of the coverage counters.
+Displays the per-second, per-minute and per-hour rates, and total count
+of all of the coverage counters.
diff --git a/lib/coverage.c b/lib/coverage.c
index f152474..e3e7610 100644
--- a/lib/coverage.c
+++ b/lib/coverage.c
@@ -47,9 +47,21 @@ struct coverage_counter *coverage_counters[] = {
 #define n_coverage_counters ARRAY_SIZE(coverage_counters)
 #endif  /* !USE_LINKER_SECTIONS */

-static unsigned int epoch;
+/* Makes coverage_run run every 5 seconds. */
+#define COVERAGE_RUN_INTERVAL    5000
+static long long int coverage_run_time = LLONG_MIN;
+
+/* Defines the moving average array index variables. */
+static unsigned int min_idx = 0;
+static unsigned int hr_idx = 0;
+
+/* Index counter used to compute the moving average array's index. */
+static unsigned int idx_count = 0;

 static void coverage_read(struct svec *);
+static unsigned int coverage_array_sum(const unsigned int *arr,
+                                       const unsigned int len);
+

 static void
 coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
@@ -170,7 +182,7 @@ coverage_log(void)
         uint32_t hash = coverage_hash();
         if (coverage_hit(hash)) {
             VLOG_INFO("Skipping details of duplicate event coverage for "
-                      "hash=%08"PRIx32" in epoch %u", hash, epoch);
+                      "hash=%08"PRIx32, hash);
         } else {
             struct svec lines;
             const char *line;
@@ -189,8 +201,16 @@ coverage_log(void)
 static void
 coverage_read_counter(struct svec *lines, const struct coverage_counter *c)
 {
-    svec_add_nocopy(lines, xasprintf("%-24s %5u / %9llu",
-                                     c->name, c->count, c->count +
c->total));
+    /* The per second rate is calculated via averaging the counts in
+     * last 5-second interval. */
+    svec_add_nocopy(lines,
+            xasprintf("%-24s %5.1f/sec %7u/min "
+                      "%9u/hr   total: %llu",
+                      c->name,
+                      c->sec_to_min[(min_idx + 11) % MIN_LEN] / 5.0,
+                      coverage_array_sum(c->sec_to_min, MIN_LEN),
+                      coverage_array_sum(c->min_to_hr,  HR_LEN),
+                      c->count + c->total));
 }

 /* Adds coverage counter information to 'lines'. */
@@ -204,8 +224,8 @@ coverage_read(struct svec *lines)
     hash = coverage_hash();

     n_never_hit = 0;
-    svec_add_nocopy(lines, xasprintf("Event coverage (epoch %u/entire
run), "
-                                     "hash=%08"PRIx32":", epoch, hash));
+    svec_add_nocopy(lines, xasprintf("Event coverage (frequency), "
+                                     "hash=%08"PRIx32":", hash));
     for (i = 0; i < n_coverage_counters; i++) {
         struct coverage_counter *c = coverage_counters[i];
         if (c->count) {
@@ -225,16 +245,65 @@ coverage_read(struct svec *lines)
     svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit));
 }

-/* Advances to the next epoch of coverage, resetting all the counters to
0. */
+/* Runs every second to update the coverage counters'
+ * sec_to_min array and min_to_hr array. */
 void
-coverage_clear(void)
+coverage_run(void)
 {
+    long long int cur_time = time_msec();
+
+    /* Initialize the coverage_run_time. */
+    if (coverage_run_time == LLONG_MIN) {
+        coverage_run_time = time_msec();
+    }
+
+    if (cur_time >= coverage_run_time) {
+        size_t i, j;
+        /* Computes the number of 5-second slots. */
+        int slots = (cur_time - coverage_run_time) / COVERAGE_RUN_INTERVAL
+                    + 1;
+
+        /* This loop is necessary, since it is possible that the actual
+         * run interval is multiple of 5 seconds. */
+        for (i = 0; i < slots; i++) {
+            /* Updates the index variables. */
+            /* The min_idx is increased from 0 to 11. Every time the
+             * min_idx finishes a cycle (a cycle is one minute),
+             * the hr_idx is incremented by 1. */
+            min_idx = idx_count % MIN_LEN;
+            hr_idx  = idx_count / MIN_LEN;
+
+            for (j = 0; j < n_coverage_counters; j++) {
+                struct coverage_counter *c = coverage_counters[j];
+                /* Calculates the portion to be added, which is the average
+                 * of counts over the time. Note, the last loop will also
+                 * include the remainder. */
+                int portion = c->count / slots + (i == slots - 1 ?
+                              c->count % slots : 0);
+
+                c->total += portion;
+                c->count = (i == slots -1 ? 0 : c->count) ;
+                c->sec_to_min[min_idx] = portion;
+                /* Note, only adds up the latest count portion here. */
+                c->min_to_hr[hr_idx] = min_idx == 0 ?
+                     portion : (c->min_to_hr[hr_idx] + portion);
+            }
+            /* This is to guarantee that mov_avg_hr_idx ranges from 0 to
59. */
+            idx_count = (idx_count + 1) % 720;
+            /* Updates the run time. */
+            coverage_run_time += COVERAGE_RUN_INTERVAL;
+        }
+    }
+}
+
+static unsigned int
+coverage_array_sum(const unsigned int *arr, const unsigned int len)
+{
+    unsigned int sum = 0;
     size_t i;

-    epoch++;
-    for (i = 0; i < n_coverage_counters; i++) {
-        struct coverage_counter *c = coverage_counters[i];
-        c->total += c->count;
-        c->count = 0;
+    for (i = 0; i < len; i++) {
+        sum += arr[i];
     }
+    return sum;
 }
diff --git a/lib/coverage.h b/lib/coverage.h
index 968c489..26837ed 100644
--- a/lib/coverage.h
+++ b/lib/coverage.h
@@ -29,11 +29,19 @@

 #include "vlog.h"

+/* Defines the moving average array length. */
+#define MIN_LEN 12   /* 12 * 5 seconds = 1 minute */
+#define HR_LEN  60   /* 60 * 1 minute = 1 hour */
+
 /* A coverage counter. */
 struct coverage_counter {
     const char *name;           /* Textual name. */
-    unsigned int count;         /* Count within the current epoch. */
-    unsigned long long int total; /* Total count over all epochs. */
+    unsigned int count;         /* Count within the current second. */
+    unsigned long long int total; /* Total count over all time. */
+
+    /* The moving average arrays. */
+    unsigned int sec_to_min[MIN_LEN];
+    unsigned int min_to_hr[HR_LEN];
 };

 /* Defines COUNTER.  There must be exactly one such definition at file
scope
@@ -57,11 +65,12 @@ struct coverage_counter {

 void coverage_init(void);
 void coverage_log(void);
-void coverage_clear(void);
+void coverage_run(void);

 /* Implementation detail. */
 #define COVERAGE_DEFINE__(COUNTER)                              \
         extern struct coverage_counter counter_##COUNTER;       \
-        struct coverage_counter counter_##COUNTER = { #COUNTER, 0, 0 }
+        struct coverage_counter counter_##COUNTER = { #COUNTER, 0, 0, \
+                                                      {0}, {0} }

 #endif /* coverage.h */
diff --git a/lib/timeval.c b/lib/timeval.c
index 5ff59bc..fe4f2a7 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -333,7 +333,6 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long
long int timeout_when,
     if (last_wakeup) {
         log_poll_interval(last_wakeup);
     }
-    coverage_clear();
     start = time_msec();
     blocked = false;

diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index e11febd..5194d97 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -28,6 +28,7 @@
 #include "bridge.h"
 #include "command-line.h"
 #include "compiler.h"
+#include "coverage.h"
 #include "daemon.h"
 #include "dirs.h"
 #include "dpif.h"
@@ -126,6 +127,7 @@ main(int argc, char *argv[])
         bridge_run_fast();
         unixctl_server_run(unixctl);
         netdev_run();
+        coverage_run();

         worker_wait();
         signal_wait(sighup);

--
1.7.9.5


On Mon, Jun 17, 2013 at 11:11 AM, Alex Wang <alexw at nicira.com> wrote:

> Thanks Ben,
>
>
> On Wed, Jun 12, 2013 at 4:20 PM, Ben Pfaff <blp at nicira.com> wrote:
>
>> On Wed, Jun 12, 2013 at 11:38:41AM -0700, Alex Wang wrote:
>> > This commit changes the "ovs-appclt coverage/show" command to show the
>> > per-second, per-minute and per-hour rates of function invocation. More
>> > importantly, this makes using coverage counter an easy way to monitor
>> > the execution of specific functions.
>> >
>> > Signed-off-by: Alex Wang <alexw at nicira.com>
>> > ---
>> > Possible Issue:
>> >
>> > The "coverage_run" runs every second. And the number of addition
>> operations is
>> > not very large. If it is still considered to be time-consuming, I'll
>> adjust
>> > further.
>> >
>>
>> I have two high-level design questions.
>>
>> First is the cost, not of the periodic summations (I doubt that cost
>> matters) but of forcing a wakeup every second.  This probably doesn't
>> matter in ovs-vswitchd, but other daemons that don't wake up so
>> frequently also use the poll_loop library, and it would be a shame to
>> drive up system load unnecessarily.  So I would prefer, if possible,
>> for the coverage code not to use a timer but instead to just do work
>> in coverage_run() whenever the process happens to wake up.
>>
>
>
> This is really important to know.
>
>
>
>> Second is accuracy.  I believe that the current code considers any
>> interval greater than or equal to a second as exactly a second.
>> Because wakeups aren't precise, that tends to stretch out the
>> definition of a second to somewhat longer.  If you take my suggestion
>> about cost, above, that makes the problem worse.  So I think that it
>> may be worthwhile to measure the length of the actual interval for a
>> "second" and divide by its length.  For example, if there were 100
>> events of some type over 1.5 seconds, we would count that as 100 / 1.5
>> == 66.67 events per second, and use that in the average.  That might
>> require using "double"s for the average counters, or we could use
>> fixed-point arithmetic; I think either solution could be OK.
>>
>
>
> Yes, this will achieve the per-second rate. But will not be very easy to
> compute the
> per-minute and hourly rate. So far, I cannot find a very efficient way to
> calculate
> these (maybe use long linked-lists that count to 1 minute and 1 hour?).
>
> I'll put detailed comments in v2.
>
>
> I didn't do a detailed review yet because I'd like to hear your
>> thoughts on these points first.  I did notice that
>> coverage_array_sum() is only used in coverage.c, so I would define it
>> there (and not "inline"), not in coverage.h.
>>
>
>
> Thanks for pointing this out.
>
>
>
>> Thanks,
>>
>> Ben.
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openvswitch.org/pipermail/ovs-dev/attachments/20130617/1c55546a/attachment-0003.html>


More information about the dev mailing list