[ovs-dev] [PATCH 2/2] timeval: Add new "backtrace" appctl command.

Ethan Jackson ethan at nicira.com
Sat Oct 13 02:40:23 UTC 2012


Often, it can be quite difficult to debug performance issues in
Open vSwitch.  Typically one needs to run something like gprof, but
that requires rebuilding and installing on the affected system
which is often problematic.  This patch adds a light weight
profiling solution which can be used in these situations.  The
ovs-appctl backtrace command prints out backtraces taken at 100
millisecond intervals over a 5 second period of time.  It is
currently only supported on systems which have the execinfo library
and enable time caching.

Signed-off-by: Ethan Jackson <ethan at nicira.com>
---
 configure.ac  |  2 +-
 lib/timeval.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 6fb30fd..ef513aa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -61,7 +61,7 @@ OVS_CHECK_STRTOK_R
 AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
   [], [], [[#include <sys/stat.h>]])
 AC_CHECK_FUNCS([mlockall strnlen strsignal getloadavg statvfs setmntent])
-AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h])
+AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h execinfo.h])
 
 OVS_CHECK_PKIDIR
 OVS_CHECK_RUNDIR
diff --git a/lib/timeval.c b/lib/timeval.c
index d3b6685..345c9ae 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -18,6 +18,9 @@
 #include "timeval.h"
 #include <assert.h>
 #include <errno.h>
+#if HAVE_EXECINFO_H
+#include <execinfo.h>
+#endif
 #include <poll.h>
 #include <signal.h>
 #include <stdlib.h>
@@ -27,12 +30,18 @@
 #include <unistd.h>
 #include "coverage.h"
 #include "dummy.h"
+#include "dynamic-string.h"
 #include "fatal-signal.h"
+#include "list.h"
 #include "signals.h"
 #include "unixctl.h"
 #include "util.h"
 #include "vlog.h"
 
+#ifndef HAVE_EXECINFO_H
+#define HAVE_EXECINFO_H 0
+#endif
+
 VLOG_DEFINE_THIS_MODULE(timeval);
 
 /* The clock to use for measuring time intervals.  This is CLOCK_MONOTONIC by
@@ -61,6 +70,16 @@ static bool time_stopped;           /* Disables real-time updates, if true. */
 /* Time in milliseconds at which to die with SIGALRM (if not LLONG_MAX). */
 static long long int deadline = LLONG_MAX;
 
+static struct unixctl_conn *backtrace_conn = NULL;
+static struct list traces = LIST_INITIALIZER(&traces);
+static size_t n_traces OVS_UNUSED = 0;
+
+struct trace {
+    struct list node;    /* In 'traces' list. */
+    void *backtrace[32]; /* Populated by backtrace(). */
+    size_t n_frames;     /* Number of frames in 'backtrace'. */
+};
+
 static void set_up_timer(void);
 static void set_up_signal(int flags);
 static void sigalrm_handler(int);
@@ -73,17 +92,29 @@ static struct rusage *get_recent_rusage(void);
 static void refresh_rusage(void);
 static void timespec_add(struct timespec *sum,
                          const struct timespec *a, const struct timespec *b);
+static void trace_run(void);
+static unixctl_cb_func backtrace_cb;
 
 /* Initializes the timetracking module, if not already initialized. */
 static void
 time_init(void)
 {
     static bool inited;
+
+    /* The best place to do this is probably a timeval_run() function.
+     * However, none exists and this function is usually so fast that doing it
+     * here seems fine for now. */
+    trace_run();
+
     if (inited) {
         return;
     }
     inited = true;
 
+    if (HAVE_EXECINFO_H && CACHE_TIME) {
+        unixctl_command_register("backtrace", "", 0, 0, backtrace_cb, NULL);
+    }
+
     coverage_init();
 
     if (!clock_gettime(CLOCK_MONOTONIC, &monotonic_time)) {
@@ -364,6 +395,17 @@ sigalrm_handler(int sig_nr OVS_UNUSED)
 {
     wall_tick = true;
     monotonic_tick = true;
+
+#if HAVE_EXECINFO_H
+    if (backtrace_conn) {
+        struct trace *trace = xmalloc(sizeof *trace);
+
+        n_traces++;
+        list_push_back(&traces, &trace->node);
+        trace->n_frames = backtrace(trace->backtrace,
+                                    ARRAY_SIZE(trace->backtrace));
+    }
+#endif
 }
 
 static void
@@ -565,6 +607,40 @@ get_cpu_usage(void)
 {
     return cpu_usage;
 }
+
+static void
+trace_run(void)
+{
+#if HAVE_EXECINFO_H
+    if (backtrace_conn && n_traces >= 50) {
+        struct ds ds = DS_EMPTY_INITIALIZER;
+        struct trace *trace, *next;
+        size_t i;
+
+        i = 1;
+        LIST_FOR_EACH_SAFE (trace, next, node, &traces) {
+            char **frame_strs;
+            size_t j;
+
+            frame_strs = backtrace_symbols(trace->backtrace, trace->n_frames);
+
+            ds_put_format(&ds, "Backtrace %zu\n", i);
+            for (j = 0; j < trace->n_frames; j++) {
+                ds_put_format(&ds, "%s\n", frame_strs[j]);
+            }
+            ds_put_cstr(&ds, "\n");
+
+            free(frame_strs);
+            list_remove(&trace->node);
+            free(trace);
+            i++;
+        }
+        n_traces = 0;
+        unixctl_command_reply(backtrace_conn, ds_cstr(&ds));
+        backtrace_conn = NULL;
+    }
+#endif
+}
 
 /* Unixctl interface. */
 
@@ -604,6 +680,15 @@ timeval_warp_cb(struct unixctl_conn *conn,
     unixctl_command_reply(conn, "warped");
 }
 
+static void
+backtrace_cb(struct unixctl_conn *conn,
+             int argc OVS_UNUSED, const char *argv[] OVS_UNUSED,
+             void *aux OVS_UNUSED)
+{
+    assert(HAVE_EXECINFO_H && CACHE_TIME);
+    backtrace_conn = conn;
+}
+
 void
 timeval_dummy_register(void)
 {
-- 
1.7.12




More information about the dev mailing list