[ovs-dev] [PATCH v3 9/9] ovn-trace: New utility.

Ben Pfaff blp at ovn.org
Mon Aug 8 18:21:52 UTC 2016


This new utility is intended to fulfill for OVN the purpose that
"ofproto/trace" has for Open vSwitch.  First, it's meant to be a useful
tool for troubleshooting and diagnosis and in general for improving one's
understanding of the emergent properties of a flow table.  Second, it
simplifies and increases the practical scope of testing, as well as making
testing more reliable and repeatable and failures easier to interpret.

This commit adds only a single test that uses the new utility, based on the
oldest OVN end-to-end test "ovn -- 3 HVs, 1 LS, 3 lports/HV".  The
differences between the old and the new test illustrate properties of
tracing.  First, the new test does not start any ovn-controller processes
or simulate any hypervisors in a nontrivial way.  This is because ovn-trace
does not actually forward packets or rely on the physical structure of the
system.  Second, whereas the old test tested not just the logical but also
the physical structure of the system, it needed to have several logical
ports, a total of 9 (3 on each of 3 HVs), whereas since this test only
tests the logical network implementation it can use a smaller number.  This
property also means that the new test runs signicantly faster than the old
one (less than a second on my laptop).

In my opinion this approach points the way toward the future of OVN
testing.  Certainly, we need end-to-end tests.  However, I believe that the
bulk of our tests can be broken into ones that test the logical network
implementation (using tracing) and ones that test physical/logical
translation.

Signed-off-by: Ben Pfaff <blp at ovn.org>
---
 include/openvswitch/meta-flow.h |    8 +
 include/ovn/actions.h           |    8 +
 lib/automake.mk                 |    1 +
 lib/daemon.xml                  |  121 ++++
 ovn/lib/actions.c               |   23 +-
 ovn/utilities/automake.mk       |   11 +-
 ovn/utilities/ovn-trace.8.xml   |  287 ++++++++
 ovn/utilities/ovn-trace.c       | 1429 +++++++++++++++++++++++++++++++++++++++
 tests/ovn.at                    |  216 ++++++
 9 files changed, 2096 insertions(+), 8 deletions(-)
 create mode 100644 lib/daemon.xml
 create mode 100644 ovn/utilities/ovn-trace.8.xml
 create mode 100644 ovn/utilities/ovn-trace.c

diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index f209fc2..23f9916 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -1808,6 +1808,14 @@ struct mf_bitmap {
 #error "Need to update CASE_MFF_XXREGS to match FLOW_N_XXREGS"
 #endif
 
+static inline bool
+mf_is_register(enum mf_field_id id)
+{
+    return ((id >= MFF_REG0   && id < MFF_REG0   + FLOW_N_REGS) ||
+            (id >= MFF_XREG0  && id < MFF_XREG0  + FLOW_N_XREGS) ||
+            (id >= MFF_XXREG0 && id < MFF_XXREG0 + FLOW_N_XXREGS));
+}
+
 /* Use this macro as CASE_MFF_TUN_METADATA: in a switch statement to choose
  * all of the MFF_TUN_METADATAn cases. */
 #define CASE_MFF_TUN_METADATA                         \
diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 9ff7b27..0a0f1b2 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -26,6 +26,7 @@
 #include "util.h"
 
 struct lexer;
+struct ofpact_set_field;
 struct ofpbuf;
 struct shash;
 struct simap;
@@ -148,6 +149,13 @@ struct ovnact_load {
     union expr_constant imm;
 };
 
+void ovnact_load_to_ofpact_set_field(const struct ovnact_load *,
+                                     bool (*lookup_port)(const void *aux,
+                                                         const char *port_name,
+                                                         unsigned int *portp),
+                                     const void *aux,
+                                     struct ofpact_set_field *);
+
 /* OVNACT_MOVE, OVNACT_EXCHANGE. */
 struct ovnact_move {
     struct ovnact ovnact;
diff --git a/lib/automake.mk b/lib/automake.mk
index 2faaeac..cda9648 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -434,6 +434,7 @@ EXTRA_DIST += \
 	lib/dh2048.pem \
 	lib/dh4096.pem \
 	lib/common.xml \
+	lib/daemon.xml \
 	lib/dirs.c.in \
 	lib/db-ctl-base.xml \
 	lib/ssl.xml \
diff --git a/lib/daemon.xml b/lib/daemon.xml
new file mode 100644
index 0000000..d752e99
--- /dev/null
+++ b/lib/daemon.xml
@@ -0,0 +1,121 @@
+<?xml version="1.0" encoding="utf-8"?>
+<dl>
+  <dt><code>--pidfile</code>[<code>=</code><var>pidfile</var>]</dt>
+  <dd>
+    <p>
+      Causes a file (by default, <code><var>program</var>.pid</code>) to be
+      created indicating the PID of the running process.  If the
+      <var>pidfile</var> argument is not specified, or if it does not begin
+      with <code>/</code>, then it is created in <code>@RUNDIR@</code>.
+    </p>
+
+    <p>
+      If <code>--pidfile</code> is not specified, no pidfile is created.
+    </p>
+  </dd>
+
+  <dt><code>--overwrite-pidfile</code></dt>
+  <dd>
+    <p>
+      By default, when <code>--pidfile</code> is specified and the specified
+      pidfile already exists and is locked by a running process, the daemon
+      refuses to start.  Specify <code>--overwrite-pidfile</code> to cause it
+      to instead overwrite the pidfile.
+    </p>
+
+    <p>
+      When <code>--pidfile</code> is not specified, this option has no effect.
+    </p>
+  </dd>
+
+  <dt><code>--detach</code></dt>
+  <dd>
+    Runs this program as a background process.  The process forks, and in the
+    child it starts a new session, closes the standard file descriptors (which
+    has the side effect of disabling logging to the console), and changes its
+    current directory to the root (unless <code>--no-chdir</code> is
+    specified).  After the child completes its initialization, the parent
+    exits.
+  </dd>
+
+  <dt><code>--monitor</code></dt>
+  <dd>
+    <p>
+      Creates an additional process to monitor this program.  If it dies due to
+      a signal that indicates a programming error (<code>SIGABRT</code>,
+      <code>SIGALRM</code>, <code>SIGBUS</code>, <code>SIGFPE</code>,
+      <code>SIGILL</code>, <code>SIGPIPE</code>, <code>SIGSEGV</code>,
+      <code>SIGXCPU</code>, or <code>SIGXFSZ</code>) then the monitor process
+      starts a new copy of it.  If the daemon dies or exits for another reason,
+      the monitor process exits.
+    </p>
+
+    <p>
+      This option is normally used with <code>--detach</code>, but it also
+      functions without it.
+    </p>
+  </dd>
+
+  <dt><code>--no-chdir</code></dt>
+  <dd>
+    <p>
+      By default, when <code>--detach</code> is specified, the deamon changes
+      its current working directory to the root directory after it detaches.
+      Otherwise, invoking the daemon from a carelessly chosen directory would
+      prevent the administrator from unmounting the file system that holds that
+      directory.
+    </p>
+
+    <p>
+      Specifying <code>--no-chdir</code> suppresses this behavior, preventing
+      the daemon from changing its current working directory.  This may be
+      useful for collecting core files, since it is common behavior to write
+      core dumps into the current working directory and the root directory is
+      not a good directory to use.
+    </p>
+
+    <p>
+      This option has no effect when <code>--detach</code> is not specified.
+    </p>
+  </dd>
+
+  <dt><code>--no-self-confinement</code></dt>
+  <dd>
+    By default this daemon will try to self-confine itself to work with files
+    under well-known directories whitelisted at build time.  It is better to
+    stick with this default behavior and not to use this flag unless some other
+    Access Control is used to confine daemon.  Note that in contrast to other
+    access control implementations that are typically enforced from
+    kernel-space (e.g. DAC or MAC), self-confinement is imposed from the
+    user-space daemon itself and hence should not be considered as a full
+    confinement strategy, but instead should be viewed as an additional layer
+    of security.
+  </dd>
+
+  <dt><code>--user=</code><var>user</var><code>:</code><var>group</var></dt>
+  <dd>
+    <p>
+      Causes this program to run as a different user specified in
+      <var>user</var><code>:</code><var>group</var>, thus dropping most of the
+      root privileges. Short forms <var>user</var> and
+      <code>:</code><var>group</var> are also allowed, with current user or
+      group assumed, respectively.  Only daemons started by the root user
+      accepts this argument.
+    </p>
+
+    <p>
+      On Linux, daemons will be granted <code>CAP_IPC_LOCK</code> and
+      <code>CAP_NET_BIND_SERVICES</code> before dropping root privileges.
+      Daemons interact with datapath, such as <code>ovs-vswitchd</code>, will
+      be granted two additional capabilities, namely <code>CAP_NET_ADMIN</code>
+      and <code>CAP_NET_RAW</code>.  The capability change will apply even if
+      the new user is root.
+    </p>
+
+    <p>
+      On Windows, this option is not currently supported.  For security
+      reasons, specifying this option will cause the daemon process not to
+      start.
+    </p>
+  </dd>
+</dl>
diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c
index 95ee0c6..d3cc79d 100644
--- a/ovn/lib/actions.c
+++ b/ovn/lib/actions.c
@@ -340,15 +340,17 @@ format_LOAD(const struct ovnact_load *load, struct ds *s)
     ds_put_char(s, ';');
 }
 
-static void
-encode_LOAD(const struct ovnact_load *load,
-            const struct ovnact_encode_params *ep,
-            struct ofpbuf *ofpacts)
+void
+ovnact_load_to_ofpact_set_field(const struct ovnact_load *load,
+                                bool (*lookup_port)(const void *aux,
+                                                    const char *port_name,
+                                                    unsigned int *portp),
+                                const void *aux,
+                                struct ofpact_set_field *sf)
 {
     const union expr_constant *c = &load->imm;
     struct mf_subfield dst = expr_resolve_field(&load->dst);
 
-    struct ofpact_set_field *sf = ofpact_put_SET_FIELD(ofpacts);
     sf->field = dst.field;
 
     if (load->dst.symbol->width) {
@@ -365,7 +367,7 @@ encode_LOAD(const struct ovnact_load *load,
         }
     } else {
         uint32_t port;
-        if (!ep->lookup_port(ep->aux, load->imm.string, &port)) {
+        if (!lookup_port(aux, load->imm.string, &port)) {
             port = 0;
         }
         bitwise_put(port, &sf->value,
@@ -375,6 +377,15 @@ encode_LOAD(const struct ovnact_load *load,
 }
 
 static void
+encode_LOAD(const struct ovnact_load *load,
+            const struct ovnact_encode_params *ep,
+            struct ofpbuf *ofpacts)
+{
+    ovnact_load_to_ofpact_set_field(load, ep->lookup_port, ep->aux,
+                                    ofpact_put_SET_FIELD(ofpacts));
+}
+
+static void
 free_LOAD(struct ovnact_load *load)
 {
     expr_constant_destroy(&load->imm, load_type(load));
diff --git a/ovn/utilities/automake.mk b/ovn/utilities/automake.mk
index d84368c..aaed9c0 100644
--- a/ovn/utilities/automake.mk
+++ b/ovn/utilities/automake.mk
@@ -4,7 +4,8 @@ scripts_SCRIPTS += \
 man_MANS += \
     ovn/utilities/ovn-ctl.8 \
     ovn/utilities/ovn-nbctl.8 \
-    ovn/utilities/ovn-sbctl.8
+    ovn/utilities/ovn-sbctl.8 \
+    ovn/utilities/ovn-trace.8
 
 MAN_ROOTS += ovn/utilities/ovn-sbctl.8.in
 
@@ -18,7 +19,8 @@ EXTRA_DIST += \
     ovn/utilities/ovn-ctl.8.xml \
     ovn/utilities/ovn-docker-overlay-driver \
     ovn/utilities/ovn-docker-underlay-driver \
-    ovn/utilities/ovn-nbctl.8.xml
+    ovn/utilities/ovn-nbctl.8.xml \
+    ovn/utilities/ovn-trace.8.xml
 
 DISTCLEANFILES += \
     ovn/utilities/ovn-ctl.8 \
@@ -35,4 +37,9 @@ bin_PROGRAMS += ovn/utilities/ovn-sbctl
 ovn_utilities_ovn_sbctl_SOURCES = ovn/utilities/ovn-sbctl.c
 ovn_utilities_ovn_sbctl_LDADD = ovn/lib/libovn.la ovsdb/libovsdb.la lib/libopenvswitch.la
 
+# ovn-trace
+bin_PROGRAMS += ovn/utilities/ovn-trace
+ovn_utilities_ovn_trace_SOURCES = ovn/utilities/ovn-trace.c
+ovn_utilities_ovn_trace_LDADD = ovn/lib/libovn.la ovsdb/libovsdb.la lib/libopenvswitch.la
+
 include ovn/utilities/bugtool/automake.mk
diff --git a/ovn/utilities/ovn-trace.8.xml b/ovn/utilities/ovn-trace.8.xml
new file mode 100644
index 0000000..411bf1c
--- /dev/null
+++ b/ovn/utilities/ovn-trace.8.xml
@@ -0,0 +1,287 @@
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovn-trace" section="8" title="ovn-trace">
+  <h1>Name</h1>
+  <p>ovn-trace -- Open Virtual Network logical network tracing utility</p>
+
+  <h1>Synopsis</h1>
+  <p><code>ovn-trace</code> [<var>options</var>] <var>datapath</var> <var>microflow</var></p>
+  <p><code>ovn-trace</code> [<var>options</var>] <code>--detach</code></p>
+  
+  <h1>Description</h1>
+  <p>
+    This utility simulates packet forwarding within an OVN logical network.
+    It can be used to run through ``what-if'' scenarios: if a packet
+    originates at a logical port, what will happen to it and where will it
+    ultimately end up?  Users already familiar with the Open vSwitch
+    <code>ofproto/trace</code> command described in
+    <code>ovs-vswitch</code>(8) will find <code>ovn-trace</code> to be a
+    similar tool for logical networks.
+  </p>
+
+  <p>
+    <code>ovn-trace</code> works by reading the <code>Logical_Flow</code> and
+    other tables from the OVN southbound database (see
+    <code>ovn-sb</code>(5)).  It simulates a packet's path through logical
+    networks by repeatedly looking it up in the logical flow table, following
+    the entire tree of possibilities.
+  </p>
+
+  <p>
+    <code>ovn-trace</code> simulates only the OVN logical network.  It does
+    not simulate the physical elements on which the logical network is
+    layered.  This means that, for example, it is unimportant how VMs are
+    distributed among hypervisors, or whether their hypervisors are
+    functioning and reachable, so <code>ovn-trace</code> will yield the same
+    results regardless.  There is one important exception:
+    <code>ovn-northd</code>, the daemon that generates the logical flows that
+    <code>ovn-trace</code> simulates, treats logical ports differently based
+    on whether they are up or down.  Thus, if you see surprising results,
+    ensure that the ports involved in a simulation are up.
+  </p>
+
+  <p>
+    The simplest way to use <code>ovn-trace</code> is to provide
+    <var>datapath</var> and <var>microflow</var> arguments on the command
+    line.  In this case, it simulates the behavior of a single packet and
+    exits.  For an alternate usage model, see <code>Daemon Mode</code> below.
+  </p>
+
+  <p>
+    The <var>datapath</var> argument specifies the name of a logical
+    datapath.  Acceptable names are the <code>name</code> from the northbound
+    <code>Logical_Switch</code> or <code>Logical_Router</code> table, the
+    UUID of a record from one of those tables, or the UUID of a record from
+    the southbound <code>Datapath_Binding</code> table.
+  </p>
+
+  <p>
+    The <var>microflow</var> argument describes the packet whose forwarding
+    is to be simulated, in the syntax of an OVN logical expression, as
+    described in <code>ovn-sb</code>(5), to express constraints.  The parser
+    understands prerequisites; for example, if the expression refers to
+    <code>ip4.src</code>, there is no need to explicitly state
+    <code>ip4</code> or <code>eth.type == 0x800</code>.
+  </p>
+
+  <p>
+    For reasonable L2 behavior, the microflow should include at least
+    <code>inport</code> and <code>eth.dst</code>, plus <code>eth.src</code>
+    if port security is enabled.  For example:
+  </p>
+  <pre>
+    inport == "lp11" &amp;&amp; eth.src == 00:01:02:03:04:05 &amp;&amp; eth.dst == ff:ff:ff:ff:ff:ff
+  </pre>
+
+  <p>
+    For reasonable L3 behavior, <var>microflow</var> should also include
+    <code>ip4.src</code> and <code>ip4.dst</code> (or <code>ip6.src</code>
+    and <code>ip6.dst</code>) and <code>ip.ttl</code>.  For example:
+  </p>
+  <pre>
+    inport == "lp111" &amp;&amp; eth.src == f0:00:00:00:01:11 &amp;&amp; eth.dst == 00:00:00:00:ff:11
+    &amp;&amp; ip4.src == 192.168.11.1 &amp;&amp; ip4.dst == 192.168.22.2 &amp;&amp; ip.ttl == 64
+  </pre>
+
+  <p>Here's an ARP microflow example:</p>
+  <pre>
+    inport == "lp123"
+    &amp;&amp; eth.dst == ff:ff:ff:ff:ff:ff &amp;&amp; eth.src == f0:00:00:00:01:11
+    &amp;&amp; arp.op == 1 &amp;&amp; arp.sha == f0:00:00:00:01:11 &amp;&amp; arp.spa == 192.168.1.11
+    &amp;&amp; arp.tha == ff:ff:ff:ff:ff:ff &amp;&amp; arp.tpa == 192.168.2.22
+  </pre>
+
+  <p>
+    <code>ovn-trace</code> will reject erroneous microflow expressions, which
+    beyond syntax errors fall into two categories.  First, they can be
+    ambiguous.  For example, <code>tcp.src == 80</code> is ambiguous because
+    it does not state IPv4 or IPv6 as the Ethernet type.  <code>ip4
+    &amp;&amp; tcp.src > 1024</code> is also ambiguous because it does not
+    constrain bits of <code>tcp.src</code> to particular values.  Second,
+    they can be contradictory, e.g. <code>ip4 &amp;&amp; ip6</code>.
+  </p>
+
+  <h1>Output</h1>
+
+  <p>
+    <code>ovn-trace</code> supports the three different forms of output, each
+    described in a separate section below.  Regardless of the selected output
+    format, <code>ovn-trace</code> starts the output with a line that shows
+    the microflow being traced in OpenFlow syntax.
+  </p>
+
+  <h2>Detailed Output</h2>
+
+  <p>
+    The detailed form of output is also the default form.  This form groups
+    output into sections headed up by the ingress or egress pipeline being
+    traversed.  Each pipeline lists each table that was visited (by number
+    and name), the match expression and priority of the logical flow that was
+    matched, and the actions that were executed.
+  </p>
+
+  <p>
+    The execution of OVN logical actions naturally forms a ``control stack''
+    that resembles that of a program in conventional programming languages
+    such as C or Java.  Because the <code>next</code> action that calls into
+    another logical flow table for a lookup is a recursive construct, OVN
+    ``programs'' in practice tend to form deep control stacks that, displayed
+    in the obvious way using additional indentation for each level, quickly
+    use up the horizontal space on all but the widest displays.  To make
+    detailed output more readable, without loss of generality,
+    <code>ovn-trace</code> omits indentation for ``tail recursion,'' that is,
+    when <code>next</code> is the last action in a logical flow, it does not
+    indent details of the next table lookup more deeply.  Output still uses
+    indentation when it is needed for clarity.
+  </p>
+
+  <p>
+    OVN ``programs'' traces also tend to encounter long strings of logical
+    flows with match expression <code>1</code> (which matches every packet)
+    the single action <code>next;</code>.  These are uninteresting and merely
+    clutter output, so <code>ovn-trace</code> omits them entirely even from
+    detailed output.
+  </p>
+
+  <p>
+    The following excerpt from detailed <code>ovn-trace</code> output shows a
+    section for a packet traversing the ingress pipeline of logical datapath
+    <code>ls1</code> with ingress logical port <code>lp111</code>.  The
+    packet matches a logical flow in table 0 (aka
+    <code>ls_in_port_sec_l2</code>) with priority 50 and executes
+    <code>next(1);</code> to pass to table 1.  Tables 1 through 11 are
+    trivial and omitted.  In table 12 (aka <code>ls_in_l2_lkup</code>), the
+    packet matches a flow with priority 50 based on its Ethernet destination
+    address and the flow's actions output the packet to the
+    <code>lrp11-attachement</code> logical port.
+  </p>
+
+  <pre fixed="yes">
+    ingress(dp="ls1", inport="lp111")
+    ---------------------------------
+    0. ls_in_port_sec_l2: inport == "lp111", priority 50
+    next(1);
+    12. ls_in_l2_lkup: eth.dst == 00:00:00:00:ff:11, priority 50
+    outport = "lrp11-attachment";
+    output;
+  </pre>
+
+  <h2>Summary Output</h2>
+
+  <p>
+    Summary output includes the logical pipelines visited by a packet and the
+    logical actions executed on it.  Compared to the detailed output,
+    however, it removes details of tables and logical flows traversed by a
+    packet.  It uses a format closer to that of a programming language and
+    does not attempt to avoid indentation.  The summary output equivalent to
+    the above detailed output fragment is:
+  </p>
+
+  <pre fixed="yes">
+    ingress(dp="ls1", inport="lp111") {
+    outport = "lrp11-attachment";
+    output;
+    ...
+    };
+  </pre>
+
+  <h2>Minimal Output</h2>
+
+  <p>
+    Minimal output includes only actions that modify packet data (not
+    including OVN registers or metadata such as <code>outport</code>) and
+    <code>output</code> actions that actually deliver a packet to a logical
+    port (excluding patch ports).  The operands of actions that modify packet
+    data are displayed reduced to constants, e.g. <code>ip4.dst =
+    reg0;</code> might be show as <code>ip4.dst = 192.168.0.1;</code> if that
+    was the value actually loaded.  This yields output even simpler than the
+    summary format.  (Users familiar with Open vSwitch may recognize this as
+    similar in spirit to the datapath actions listed at the bottom of
+    <code>ofproto/trace</code> output.)
+  </p>
+
+  <p>
+    The minimal output format reflects the externally seen behavior of the
+    logical networks more than it does the implementation.  This makes this
+    output format the most suitable for use in regression tests, because it
+    is least likely to change when logical flow tables are rearranged without
+    semantic change.
+  </p>
+
+  <h1>Daemon Mode</h1>
+
+  <p>
+    If <code>ovn-trace</code> is invoked with the <code>--detach</code> option
+    (see <code>Daemon Options</code>, below), it runs in the background as a
+    daemon and accepts commands from <code>ovs-appctl</code> (or another
+    JSON-RPC client) indefinitely.  The currently supported commands are
+    described below.
+  </p>
+
+  <p>
+    
+  </p>
+
+  <dl>
+    <dt><code>trace</code> [<var>options</var>] <var>datapath</var> <var>microflow</var></dt>
+    <dd>
+      Traces <var>microflow</var> through <var>datapath</var> and replies with
+      the results of the trace.  Accepts the <var>options</var> described under
+      <code>Trace Options</code> below.
+    </dd>
+
+    <dt><code>exit</code></dt>
+    <dd>Causes <code>ovn-trace</code> to gracefully terminate.</dd>
+  </dl>
+
+  <h1>Options</h1>
+  
+  <h2>Trace Options</h2>
+
+  <dl>
+    <dt><code>--detailed</code></dt>
+    <dt><code>--summary</code></dt>
+    <dt><code>--minimal</code></dt>
+    <dd>
+      These options control the form and level of detail in
+      <code>ovn-trace</code> output.  If more than one of these options is
+      specified, all of the selected forms are output, in the order listed
+      above, each headed by a banner line.  If none of these options is
+      given, <code>--detailed</code> is the default.  See
+      <code>Output</code>, above, for a description of each kind of output.
+    </dd>
+
+    <dt><code>--all</code></dt>
+    <dd>
+      Selects all three forms of output.
+    </dd>
+  </dl>
+
+  <h2>Daemon Options</h2>
+  <xi:include href="lib/daemon.xml" xmlns:xi="http://www.w3.org/2003/XInclude"/>
+
+  <h2>Logging Options</h2>
+  <xi:include href="lib/vlog.xml" xmlns:xi="http://www.w3.org/2003/XInclude"/>
+
+  <h2>PKI Options</h2>
+  <p>
+    PKI configuration is required to use SSL for the connection to the
+    database.
+  </p>
+  <xi:include href="lib/ssl.xml" xmlns:xi="http://www.w3.org/2003/XInclude"/>
+
+  <h2>Other Options</h2>
+
+  <dl>
+    <dt><code>--db</code> <var>database</var></dt>
+    <dd>
+      The OVSDB database remote to contact.  If the <env>OVN_SB_DB</env>
+      environment variable is set, its value is used as the default.
+      Otherwise, the default is <code>unix:@RUNDIR@/db.sock</code>, but this
+      default is unlikely to be useful outside of single-machine OVN test
+      environments.
+    </dd>
+  </dl>
+  
+  <xi:include href="lib/common.xml" xmlns:xi="http://www.w3.org/2003/XInclude"/>
+
+</manpage>
diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c
new file mode 100644
index 0000000..7f4ed94
--- /dev/null
+++ b/ovn/utilities/ovn-trace.c
@@ -0,0 +1,1429 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <getopt.h>
+
+#include "command-line.h"
+#include "compiler.h"
+#include "daemon.h"
+#include "dirs.h"
+#include "fatal-signal.h"
+#include "flow.h"
+#include "nx-match.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/ofp-actions.h"
+#include "openvswitch/vlog.h"
+#include "ovn/actions.h"
+#include "ovn/expr.h"
+#include "ovn/lex.h"
+#include "ovn/lib/logical-fields.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "ovn/lib/ovn-util.h"
+#include "ovsdb-idl.h"
+#include "poll-loop.h"
+#include "stream-ssl.h"
+#include "stream.h"
+#include "unixctl.h"
+#include "util.h"
+
+VLOG_DEFINE_THIS_MODULE(ovntrace);
+
+/* --db: The database server to contact. */
+static const char *db;
+
+/* --unixctl-path: Path to use for unixctl server, for "monitor" and "snoop"
+     commands. */
+static char *unixctl_path;
+
+/* The southbound database. */
+static struct ovsdb_idl *ovnsb_idl;
+
+/* --detailed: Show a detailed, table-by-table trace. */
+static bool detailed;
+
+/* --summary: Show a trace that omits table information. */
+static bool summary;
+
+/* --minimal: Show a trace with only minimal information. */
+static bool minimal;
+
+OVS_NO_RETURN static void usage(void);
+static void parse_options(int argc, char *argv[]);
+static char *trace(const char *datapath, const char *flow);
+static void read_db(void);
+static unixctl_cb_func ovntrace_exit;
+static unixctl_cb_func ovntrace_trace;
+
+int
+main(int argc, char *argv[])
+{
+    set_program_name(argv[0]);
+    fatal_ignore_sigpipe();
+    vlog_set_levels_from_string_assert("reconnect:warn");
+    sbrec_init();
+
+    /* Parse command line. */
+    parse_options(argc, argv);
+    argc -= optind;
+    argv += optind;
+
+    if (get_detach()) {
+        if (argc != 0) {
+            ovs_fatal(0, "non-option arguments not supported with --detach "
+                      "(use --help for help");
+        }
+    } else {
+        if (argc != 2) {
+            ovs_fatal(0, "exactly two non-option arguments are required "
+                      "(use --help for help");
+        }
+    }
+
+    struct unixctl_server *server = NULL;
+    bool exiting = false;
+    if (get_detach()) {
+        daemonize_start(false);
+        int error = unixctl_server_create(unixctl_path, &server);
+        if (error) {
+            ovs_fatal(error, "failed to create unixctl server");
+        }
+        unixctl_command_register("exit", "", 0, 0, ovntrace_exit, &exiting);
+        unixctl_command_register("trace", "[OPTIONS] DATAPATH MICROFLOW",
+                                 2, INT_MAX, ovntrace_trace, NULL);
+    }
+    ovnsb_idl = ovsdb_idl_create(db, &sbrec_idl_class, true, false);
+
+    bool already_read = false;
+    for (;;) {
+        ovsdb_idl_run(ovnsb_idl);
+        unixctl_server_run(server);
+        if (!ovsdb_idl_is_alive(ovnsb_idl)) {
+            int retval = ovsdb_idl_get_last_error(ovnsb_idl);
+            ovs_fatal(0, "%s: database connection failed (%s)",
+                      db, ovs_retval_to_string(retval));
+        }
+
+        if (ovsdb_idl_has_ever_connected(ovnsb_idl)) {
+            if (!already_read) {
+                already_read = true;
+                read_db();
+            }
+
+            daemonize_complete();
+            if (!get_detach()) {
+                char *output = trace(argv[0], argv[1]);
+                fputs(output, stdout);
+                free(output);
+                return 0;
+            }
+        }
+
+        if (exiting) {
+            break;
+        }
+        ovsdb_idl_wait(ovnsb_idl);
+        unixctl_server_wait(server);
+        poll_block();
+    }
+}
+
+static void
+parse_options(int argc, char *argv[])
+{
+    enum {
+        OPT_DB = UCHAR_MAX + 1,
+        OPT_UNIXCTL,
+        OPT_DETAILED,
+        OPT_SUMMARY,
+        OPT_MINIMAL,
+        OPT_ALL,
+        DAEMON_OPTION_ENUMS,
+        VLOG_OPTION_ENUMS
+    };
+    static const struct option long_options[] = {
+        {"db", required_argument, NULL, OPT_DB},
+        {"unixctl", required_argument, NULL, OPT_UNIXCTL},
+        {"detailed", no_argument, NULL, OPT_DETAILED},
+        {"summary", no_argument, NULL, OPT_SUMMARY},
+        {"minimal", no_argument, NULL, OPT_MINIMAL},
+        {"all", no_argument, NULL, OPT_ALL},
+        {"help", no_argument, NULL, 'h'},
+        {"version", no_argument, NULL, 'V'},
+        DAEMON_LONG_OPTIONS,
+        VLOG_LONG_OPTIONS,
+        STREAM_SSL_LONG_OPTIONS,
+        {NULL, 0, NULL, 0},
+    };
+    char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
+
+    for (;;) {
+        int idx;
+        int c;
+
+        c = getopt_long(argc, argv, short_options, long_options, &idx);
+        if (c == -1) {
+            break;
+        }
+
+        switch (c) {
+        case OPT_DB:
+            db = optarg;
+            break;
+
+        case OPT_UNIXCTL:
+            unixctl_path = optarg;
+            break;
+
+        case OPT_DETAILED:
+            detailed = true;
+            break;
+
+        case OPT_SUMMARY:
+            summary = true;
+            break;
+
+        case OPT_MINIMAL:
+            minimal = true;
+            break;
+
+        case OPT_ALL:
+            detailed = summary = minimal = true;
+            break;
+
+        case 'h':
+            usage();
+
+        case 'V':
+            ovs_print_version(0, 0);
+            printf("DB Schema %s\n", sbrec_get_db_version());
+            exit(EXIT_SUCCESS);
+
+        DAEMON_OPTION_HANDLERS
+        VLOG_OPTION_HANDLERS
+        STREAM_SSL_OPTION_HANDLERS
+
+        case '?':
+            exit(EXIT_FAILURE);
+
+        default:
+            abort();
+        }
+    }
+    free(short_options);
+
+    if (!db) {
+        db = default_sb_db();
+    }
+
+    if (!detailed && !summary && !minimal) {
+        detailed = true;
+    }
+}
+
+static void
+usage(void)
+{
+    printf("\
+%s: OVN trace utility\n\
+usage: %s [OPTIONS] DATAPATH MICROFLOW\n\
+       %s [OPTIONS] --detach\n\
+\n\
+Options:\n\
+  --db=DATABASE               connect to DATABASE\n\
+                              (default: %s)\n",
+           program_name, program_name, program_name, default_sb_db());
+    daemon_usage();
+    vlog_usage();
+    printf("\n\
+Other options:\n\
+  --unixctl=SOCKET            set control socket name\n\
+  -h, --help                  display this help message\n\
+  -V, --version               display version information\n");
+    stream_usage("database", true, true, false);
+    exit(EXIT_SUCCESS);
+}
+
+struct ovntrace_datapath {
+    struct hmap_node sb_uuid_node;
+    struct uuid sb_uuid;
+    struct uuid nb_uuid;
+    char *name;
+    uint32_t tunnel_key;
+
+    struct ovs_list mcgroups;   /* Contains "struct ovntrace_mcgroup"s. */
+
+    struct ovntrace_flow **flows;
+    size_t n_flows, allocated_flows;
+
+    struct hmap mac_bindings;   /* Contains "struct ovntrace_mac_binding"s. */
+};
+
+struct ovntrace_port {
+    struct ovntrace_datapath *dp;
+    char *name;
+    char *type;
+    uint16_t tunnel_key;
+    struct ovntrace_port *peer; /* Patch ports only. */
+};
+
+struct ovntrace_mcgroup {
+    struct ovs_list list_node;  /* In struct ovntrace_datapath's 'mcgroups'. */
+
+    struct ovntrace_datapath *dp;
+    char *name;
+
+    uint16_t tunnel_key;
+
+    struct ovntrace_port **ports;
+    size_t n_ports;
+};
+
+enum ovntrace_pipeline { P_INGRESS, P_EGRESS };
+
+struct ovntrace_flow {
+    enum ovntrace_pipeline pipeline;
+    int table_id;
+    char *stage_name;
+    int priority;
+    char *match_s;
+    struct expr *match;
+    struct ovnact *ovnacts;
+    size_t ovnacts_len;
+};
+
+struct ovntrace_mac_binding {
+    struct hmap_node node;
+    uint16_t port_key;
+    struct in6_addr ip;
+    struct eth_addr mac;
+};
+
+static inline uint32_t
+hash_mac_binding(uint16_t port_key, const struct in6_addr *ip)
+{
+    return hash_bytes(ip, sizeof *ip, port_key);
+}
+
+/* Every ovntrace_datapath, by southbound Datapath_Binding record UUID. */
+static struct hmap datapaths;
+
+/* Every ovntrace_port, by name. */
+static struct shash ports;
+
+/* Symbol table for expressions and actions. */
+static struct shash symtab;
+
+/* Address sets. */
+static struct shash address_sets;
+
+static struct ovntrace_datapath *
+ovntrace_datapath_find_by_sb_uuid(const struct uuid *sb_uuid)
+{
+    struct ovntrace_datapath *dp;
+    HMAP_FOR_EACH_WITH_HASH (dp, sb_uuid_node, uuid_hash(sb_uuid),
+                             &datapaths) {
+        if (uuid_equals(&dp->sb_uuid, sb_uuid)) {
+            return dp;
+        }
+    }
+    return NULL;
+}
+
+static const struct ovntrace_datapath *
+ovntrace_datapath_find_by_name(const char *name)
+{
+    struct uuid uuid;
+    bool is_uuid = uuid_from_string(&uuid, name);
+
+    struct ovntrace_datapath *dp;
+    HMAP_FOR_EACH (dp, sb_uuid_node, &datapaths) {
+        if (!strcmp(name, dp->name)
+            || (is_uuid
+                && (uuid_equals(&uuid, &dp->sb_uuid) ||
+                    uuid_equals(&uuid, &dp->nb_uuid)))) {
+            return dp;
+        }
+    }
+    return NULL;
+}
+
+static const struct ovntrace_port *
+ovntrace_port_find_by_key(const struct ovntrace_datapath *dp,
+                          uint16_t tunnel_key)
+{
+    const struct shash_node *node;
+    SHASH_FOR_EACH (node, &ports) {
+        const struct ovntrace_port *port = node->data;
+        if (port->dp == dp && port->tunnel_key == tunnel_key) {
+            return port;
+        }
+    }
+    return NULL;
+}
+
+static const struct ovntrace_mcgroup *
+ovntrace_mcgroup_find_by_key(const struct ovntrace_datapath *dp,
+                             uint16_t tunnel_key)
+{
+    const struct ovntrace_mcgroup *mcgroup;
+    LIST_FOR_EACH (mcgroup, list_node, &dp->mcgroups) {
+        if (mcgroup->tunnel_key == tunnel_key) {
+            return mcgroup;
+        }
+    }
+    return NULL;
+}
+
+static const struct ovntrace_mcgroup *
+ovntrace_mcgroup_find_by_name(const struct ovntrace_datapath *dp,
+                              const char *name)
+{
+    const struct ovntrace_mcgroup *mcgroup;
+    LIST_FOR_EACH (mcgroup, list_node, &dp->mcgroups) {
+        if (!strcmp(mcgroup->name, name)) {
+            return mcgroup;
+        }
+    }
+    return NULL;
+}
+
+static const struct ovntrace_mac_binding *
+ovntrace_mac_binding_find(const struct ovntrace_datapath *dp,
+                          uint16_t port_key, const struct in6_addr *ip)
+{
+    const struct ovntrace_mac_binding *bind;
+    HMAP_FOR_EACH_WITH_HASH (bind, node, hash_mac_binding(port_key, ip),
+                             &dp->mac_bindings) {
+        if (bind->port_key == port_key && ipv6_addr_equals(ip, &bind->ip)) {
+            return bind;
+        }
+    }
+    return NULL;
+}
+
+static void
+read_datapaths(void)
+{
+    hmap_init(&datapaths);
+    const struct sbrec_datapath_binding *sbdb;
+    SBREC_DATAPATH_BINDING_FOR_EACH (sbdb, ovnsb_idl) {
+        struct ovntrace_datapath *dp = xzalloc(sizeof *dp);
+        const struct smap *ids = &sbdb->external_ids;
+
+        dp->sb_uuid = sbdb->header_.uuid;
+        if (!smap_get_uuid(ids, "logical-switch", &dp->nb_uuid) &&
+            !smap_get_uuid(ids, "logical-router", &dp->nb_uuid)) {
+            dp->nb_uuid = dp->sb_uuid;
+        }
+
+        const char *name = smap_get(ids, "name");
+        dp->name = (name
+                    ? xstrdup(name)
+                    : xasprintf(UUID_FMT, UUID_ARGS(&dp->nb_uuid)));
+
+        dp->tunnel_key = sbdb->tunnel_key;
+
+        ovs_list_init(&dp->mcgroups);
+        hmap_init(&dp->mac_bindings);
+
+        hmap_insert(&datapaths, &dp->sb_uuid_node, uuid_hash(&dp->sb_uuid));
+    }
+}
+
+static void
+read_ports(void)
+{
+    shash_init(&ports);
+    const struct sbrec_port_binding *sbpb;
+    SBREC_PORT_BINDING_FOR_EACH (sbpb, ovnsb_idl) {
+        const char *port_name = sbpb->logical_port;
+        struct ovntrace_datapath *dp
+            = ovntrace_datapath_find_by_sb_uuid(&sbpb->datapath->header_.uuid);
+        if (!dp) {
+            VLOG_WARN("logical port %s missing datapath", port_name);
+            continue;
+        }
+
+        struct ovntrace_port *port = xzalloc(sizeof *port);
+        if (!shash_add_once(&ports, port_name, port)) {
+            VLOG_WARN("duplicate logical port name %s", port_name);
+            free(port);
+            continue;
+        }
+        port->dp = dp;
+        port->name = xstrdup(port_name);
+        port->type = xstrdup(sbpb->type);
+        port->tunnel_key = sbpb->tunnel_key;
+
+        if (!strcmp(sbpb->type, "patch")) {
+            const char *peer_name = smap_get(&sbpb->options, "peer");
+            if (peer_name) {
+                struct ovntrace_port *peer
+                    = shash_find_data(&ports, peer_name);
+                if (peer) {
+                    port->peer = peer;
+                    port->peer->peer = port;
+                }
+            }
+        }
+    }
+}
+
+static int
+compare_port(const void *a_, const void *b_)
+{
+    struct ovntrace_port *const *ap = a_;
+    struct ovntrace_port *const *bp = b_;
+    const struct ovntrace_port *a = *ap;
+    const struct ovntrace_port *b = *bp;
+
+    return strcmp(a->name, b->name);
+}
+
+static void
+read_mcgroups(void)
+{
+    const struct sbrec_multicast_group *sbmg;
+    SBREC_MULTICAST_GROUP_FOR_EACH (sbmg, ovnsb_idl) {
+        struct ovntrace_datapath *dp
+            = ovntrace_datapath_find_by_sb_uuid(&sbmg->datapath->header_.uuid);
+        if (!dp) {
+            VLOG_WARN("logical multicast group %s missing datapath",
+                      sbmg->name);
+            continue;
+        }
+
+        struct ovntrace_mcgroup *mcgroup = xzalloc(sizeof *mcgroup);
+        ovs_list_push_back(&dp->mcgroups, &mcgroup->list_node);
+        mcgroup->dp = dp;
+        mcgroup->tunnel_key = sbmg->tunnel_key;
+        mcgroup->name = xstrdup(sbmg->name);
+        mcgroup->ports = xmalloc(sbmg->n_ports * sizeof *mcgroup->ports);
+        for (size_t i = 0; i < sbmg->n_ports; i++) {
+            const char *port_name = sbmg->ports[i]->logical_port;
+            struct ovntrace_port *p = shash_find_data(&ports, port_name);
+            if (!p) {
+                VLOG_WARN("missing port %s", port_name);
+                continue;
+            }
+            if (!uuid_equals(&sbmg->ports[i]->datapath->header_.uuid,
+                             &p->dp->sb_uuid)) {
+                VLOG_WARN("multicast group %s in datapath %s contains "
+                          "port %s outside that datapath",
+                          mcgroup->name, mcgroup->dp->name, port_name);
+                continue;
+            }
+            mcgroup->ports[mcgroup->n_ports++] = p;
+        }
+
+        /* Sort the ports in alphabetical order to make output more
+         * predictable. */
+        qsort(mcgroup->ports, mcgroup->n_ports, sizeof *mcgroup->ports,
+              compare_port);
+    }
+}
+
+static void
+read_address_sets(void)
+{
+    shash_init(&address_sets);
+
+    const struct sbrec_address_set *sbas;
+    SBREC_ADDRESS_SET_FOR_EACH (sbas, ovnsb_idl) {
+        expr_macros_add(&address_sets, sbas->name,
+                        (const char *const *) sbas->addresses,
+                        sbas->n_addresses);
+    }
+}
+
+static int
+compare_flow(const void *a_, const void *b_)
+{
+    struct ovntrace_flow *const *ap = a_;
+    struct ovntrace_flow *const *bp = b_;
+    const struct ovntrace_flow *a = *ap;
+    const struct ovntrace_flow *b = *bp;
+
+    if (a->pipeline != b->pipeline) {
+        /* Sort P_INGRESS before P_EGRESS. */
+        return a->pipeline == P_EGRESS ? 1 : -1;
+    } else if (a->table_id != b->table_id) {
+        /* Sort in increasing order of table_id. */
+        return a->table_id > b->table_id ? 1 : -1;
+    } else if (a->priority != b->priority) {
+        /* Sort in decreasing order of priority. */
+        return a->priority > b->priority ? -1 : 1;
+    } else {
+        /* Otherwise who cares. */
+        return 0;
+    }
+}
+
+static void
+read_flows(void)
+{
+    ovn_init_symtab(&symtab);
+
+    const struct sbrec_logical_flow *sblf;
+    SBREC_LOGICAL_FLOW_FOR_EACH (sblf, ovnsb_idl) {
+        const struct sbrec_datapath_binding *sbdb = sblf->logical_datapath;
+        struct ovntrace_datapath *dp
+            = ovntrace_datapath_find_by_sb_uuid(&sbdb->header_.uuid);
+        if (!dp) {
+            VLOG_WARN("logical flow missing datapath");
+            continue;
+        }
+
+        char *error;
+        struct expr *match;
+        match = expr_parse_string(sblf->match, &symtab, &address_sets, &error);
+        if (error) {
+            VLOG_WARN("%s: parsing expression failed (%s)",
+                      sblf->match, error);
+            free(error);
+            continue;
+        }
+
+        struct ovnact_parse_params pp = {
+            .symtab = &symtab,
+            .dhcp_opts = NULL /* XXX */,
+            .n_tables = 16,
+            .cur_ltable = sblf->table_id,
+        };
+        uint64_t stub[1024 / 8];
+        struct ofpbuf ovnacts = OFPBUF_STUB_INITIALIZER(stub);
+        struct expr *prereqs;
+        error = ovnacts_parse_string(sblf->actions, &pp, &ovnacts, &prereqs);
+        if (error) {
+            VLOG_WARN("%s: parsing actions failed (%s)", sblf->actions, error);
+            free(error);
+            expr_destroy(match);
+            continue;
+        }
+
+        match = expr_combine(EXPR_T_AND, match, prereqs);
+        match = expr_annotate(match, &symtab, &error);
+        if (error) {
+            VLOG_WARN("match annotation failed (%s)", error);
+            free(error);
+            expr_destroy(match);
+            ovnacts_free(ovnacts.data, ovnacts.size);
+            ofpbuf_uninit(&ovnacts);
+            continue;
+        }
+        if (match) {
+            match = expr_simplify(match);
+        }
+
+        struct ovntrace_flow *flow = xzalloc(sizeof *flow);
+        flow->pipeline = (!strcmp(sblf->pipeline, "ingress")
+                          ? P_INGRESS
+                          : P_EGRESS);
+        flow->table_id = sblf->table_id;
+        flow->stage_name = nullable_xstrdup(smap_get(&sblf->external_ids,
+                                                     "stage-name"));
+        flow->priority = sblf->priority;
+        flow->match_s = xstrdup(sblf->match);
+        flow->match = match;
+        flow->ovnacts_len = ovnacts.size;
+        flow->ovnacts = ofpbuf_steal_data(&ovnacts);
+
+        if (dp->n_flows >= dp->allocated_flows) {
+            dp->flows = x2nrealloc(dp->flows, &dp->allocated_flows,
+                                   sizeof *dp->flows);
+        }
+        dp->flows[dp->n_flows++] = flow;
+    }
+
+    const struct ovntrace_datapath *dp;
+    HMAP_FOR_EACH (dp, sb_uuid_node, &datapaths) {
+        qsort(dp->flows, dp->n_flows, sizeof *dp->flows, compare_flow);
+    }
+}
+
+static void
+read_mac_bindings(void)
+{
+    const struct sbrec_mac_binding *sbmb;
+    SBREC_MAC_BINDING_FOR_EACH (sbmb, ovnsb_idl) {
+        const struct ovntrace_port *port = shash_find_data(
+            &ports, sbmb->logical_port);
+        if (!port) {
+            VLOG_WARN("missing port %s", sbmb->logical_port);
+            continue;
+        }
+
+        if (!uuid_equals(&port->dp->sb_uuid, &sbmb->datapath->header_.uuid)) {
+            VLOG_WARN("port %s is in wrong datapath", sbmb->logical_port);
+            continue;
+        }
+
+        struct in6_addr ip6;
+        ovs_be32 ip4;
+        if (ip_parse(sbmb->ip, &ip4)) {
+            ip6 = in6_addr_mapped_ipv4(ip4);
+        } else if (!ipv6_parse(sbmb->ip, &ip6)) {
+            VLOG_WARN("%s: bad IP address", sbmb->ip);
+            continue;
+        }
+
+        struct eth_addr mac;
+        if (!eth_addr_from_string(sbmb->mac, &mac)) {
+            VLOG_WARN("%s: bad Ethernet address", sbmb->mac);
+            continue;
+        }
+
+        struct ovntrace_mac_binding *binding = xmalloc(sizeof *binding);
+        binding->port_key = port->tunnel_key;
+        binding->ip = ip6;
+        binding->mac = mac;
+        hmap_insert(&port->dp->mac_bindings, &binding->node,
+                    hash_mac_binding(binding->port_key, &ip6));
+    }
+}
+
+static void
+read_db(void)
+{
+    read_datapaths();
+    read_ports();
+    read_mcgroups();
+    read_address_sets();
+    read_flows();
+    read_mac_bindings();
+}
+
+static bool
+ovntrace_lookup_port(const void *dp_, const char *port_name,
+                     unsigned int *portp)
+{
+    const struct ovntrace_datapath *dp = dp_;
+
+    if (port_name[0] == '\0') {
+        *portp = 0;
+        return true;
+    }
+
+    const struct ovntrace_port *port = shash_find_data(&ports, port_name);
+    if (port) {
+        if (port->dp == dp) {
+            *portp = port->tunnel_key;
+            return true;
+        }
+        VLOG_WARN("%s: not in datapath %s", port_name, dp->name);
+    }
+
+    const struct ovntrace_mcgroup *mcgroup = ovntrace_mcgroup_find_by_name(dp, port_name);
+    if (mcgroup) {
+        *portp = mcgroup->tunnel_key;
+        return true;
+    }
+
+    VLOG_WARN("%s: unknown logical port\n", port_name);
+    return false;
+}
+
+static const struct ovntrace_flow *
+ovntrace_flow_lookup(const struct ovntrace_datapath *dp,
+                     const struct flow *uflow,
+                     uint8_t table_id, enum ovntrace_pipeline pipeline)
+{
+    for (size_t i = 0; i < dp->n_flows; i++) {
+        const struct ovntrace_flow *flow = dp->flows[i];
+        if (flow->pipeline == pipeline &&
+            flow->table_id == table_id &&
+            expr_evaluate(flow->match, uflow, ovntrace_lookup_port, dp)) {
+            return flow;
+        }
+    }
+    return NULL;
+}
+
+
+enum ovntrace_node_type {
+    OVNTRACE_NODE_OUTPUT,
+    OVNTRACE_NODE_MODIFY,
+    OVNTRACE_NODE_PIPELINE,
+    OVNTRACE_NODE_TABLE,
+    OVNTRACE_NODE_ACTION,
+    OVNTRACE_NODE_ERROR,
+    OVNTRACE_NODE_TRANSFORMATION
+};
+
+static bool
+ovntrace_node_type_is_terminal(enum ovntrace_node_type type)
+{
+    switch (type) {
+    case OVNTRACE_NODE_OUTPUT:
+    case OVNTRACE_NODE_MODIFY:
+    case OVNTRACE_NODE_ACTION:
+    case OVNTRACE_NODE_ERROR:
+        return true;
+
+    case OVNTRACE_NODE_PIPELINE:
+    case OVNTRACE_NODE_TABLE:
+    case OVNTRACE_NODE_TRANSFORMATION:
+        return false;
+    }
+
+    OVS_NOT_REACHED();
+}
+
+struct ovntrace_node {
+    struct ovs_list node;       /* In parent. */
+
+    enum ovntrace_node_type type;
+    const char *name;
+    bool always_indent;
+    struct ovs_list subs;       /* List of children. */
+};
+
+static struct ovntrace_node * OVS_PRINTF_FORMAT(3, 4)
+ovntrace_node_append(struct ovs_list *super, enum ovntrace_node_type type,
+                     const char *format, ...)
+{
+    va_list args;
+    va_start(args, format);
+    char *s = xvasprintf(format, args);
+    va_end(args);
+
+    struct ovntrace_node *node = xmalloc(sizeof *node);
+    ovs_list_push_back(super, &node->node);
+    node->type = type;
+    node->name = s;
+    node->always_indent = false;
+    ovs_list_init(&node->subs);
+
+    return node;
+}
+
+static void
+ovntrace_node_clone(const struct ovs_list *old, struct ovs_list *new)
+{
+    const struct ovntrace_node *osub;
+    LIST_FOR_EACH (osub, node, old) {
+        struct ovntrace_node *nsub = ovntrace_node_append(new, osub->type,
+                                                          "%s", osub->name);
+        nsub->always_indent = osub->always_indent;
+        ovntrace_node_clone(&osub->subs, &nsub->subs);
+    }
+}
+
+static void
+ovntrace_node_print_details(struct ds *output,
+                            const struct ovs_list *nodes, int level)
+{
+    const struct ovntrace_node *sub;
+    LIST_FOR_EACH (sub, node, nodes) {
+        if (sub->type == OVNTRACE_NODE_MODIFY) {
+            continue;
+        }
+
+        bool more = sub->node.next != nodes || sub->always_indent || ovntrace_node_type_is_terminal(sub->type);
+        bool title = (sub->type == OVNTRACE_NODE_PIPELINE ||
+                      sub->type == OVNTRACE_NODE_TRANSFORMATION);
+        if (title) {
+            ds_put_char(output, '\n');
+        }
+        ds_put_char_multiple(output, ' ', (level + more) * 4);
+        ds_put_format(output, "%s\n", sub->name);
+        if (title) {
+            ds_put_char_multiple(output, ' ', (level + more) * 4);
+            ds_put_char_multiple(output, '-', strlen(sub->name));
+            ds_put_char(output, '\n');
+        }
+
+        ovntrace_node_print_details(output, &sub->subs, level + more + more);
+    }
+}
+
+static void
+ovntrace_node_prune_summary(struct ovs_list *nodes)
+{
+    struct ovntrace_node *sub, *next;
+    LIST_FOR_EACH_SAFE (sub, next, node, nodes) {
+        ovntrace_node_prune_summary(&sub->subs);
+        if (sub->type == OVNTRACE_NODE_MODIFY ||
+            sub->type == OVNTRACE_NODE_TABLE) {
+            ovs_list_remove(&sub->node);
+            ovs_list_splice(&next->node, sub->subs.next, &sub->subs);
+        }
+    }
+}
+
+static void
+ovntrace_node_print_summary(struct ds *output, const struct ovs_list *nodes,
+                            int level)
+{
+    const struct ovntrace_node *sub;
+    LIST_FOR_EACH (sub, node, nodes) {
+        if (sub->type == OVNTRACE_NODE_ACTION
+            && !strncmp(sub->name, "next(", 5)) {
+            continue;
+        }
+
+        ds_put_char_multiple(output, ' ', level * 4);
+        ds_put_cstr(output, sub->name);
+        if (!ovs_list_is_empty(&sub->subs)) {
+            ds_put_cstr(output, " {\n");
+            ovntrace_node_print_summary(output, &sub->subs, level + 1);
+            ds_put_char_multiple(output, ' ', level * 4);
+            ds_put_char(output, '}');
+        }
+        if (sub->type != OVNTRACE_NODE_ACTION) {
+            ds_put_char(output, ';');
+        }
+        ds_put_char(output, '\n');
+    }
+}
+
+static void
+ovntrace_node_prune_hard(struct ovs_list *nodes)
+{
+    struct ovntrace_node *sub, *next;
+    LIST_FOR_EACH_SAFE (sub, next, node, nodes) {
+        ovntrace_node_prune_hard(&sub->subs);
+        if (sub->type == OVNTRACE_NODE_ACTION ||
+            sub->type == OVNTRACE_NODE_PIPELINE ||
+            sub->type == OVNTRACE_NODE_TABLE ||
+            sub->type == OVNTRACE_NODE_OUTPUT) {
+            ovs_list_remove(&sub->node);
+            ovs_list_splice(&next->node, sub->subs.next, &sub->subs);
+        }
+    }
+}
+
+static void
+execute_load(const struct ovnact_load *load,
+             const struct ovntrace_datapath *dp, struct flow *uflow,
+             struct ovs_list *super OVS_UNUSED)
+{
+    struct ofpact_set_field sf;
+    memset(&sf, 0, sizeof sf);
+    ovnact_load_to_ofpact_set_field(load, ovntrace_lookup_port, dp, &sf);
+
+    if (!mf_is_register(sf.field->id)) {
+        struct ds s = DS_EMPTY_INITIALIZER;
+        ovnacts_format(&load->ovnact, OVNACT_LOAD_SIZE, &s);
+        ds_chomp(&s, ';');
+
+        ovntrace_node_append(super, OVNTRACE_NODE_MODIFY, "%s", ds_cstr(&s));
+
+        ds_destroy(&s);
+    }
+
+    if (mf_are_prereqs_ok(sf.field, uflow, NULL)) {
+        mf_set_flow_value_masked(sf.field, &sf.value, &sf.mask, uflow);
+    }
+}
+
+static void
+summarize_move(const struct mf_subfield *rsrc,
+               const struct expr_field *dst, const struct mf_subfield *rdst,
+               const struct flow *uflow, struct ovs_list *super OVS_UNUSED)
+{
+    if (!mf_is_register(rdst->field->id)) {
+        struct ds s = DS_EMPTY_INITIALIZER;
+        expr_field_format(dst, &s);
+        ds_put_cstr(&s, " = ");
+
+        if (rsrc->ofs == 0 && rsrc->n_bits >= rsrc->field->n_bits) {
+            union mf_value value;
+            mf_get_value(rsrc->field, uflow, &value);
+            mf_format(rsrc->field, &value, NULL, &s);
+        } else {
+            union mf_subvalue cst;
+            mf_read_subfield(rsrc, uflow, &cst);
+            ds_put_hex(&s, &cst, sizeof cst);
+        }
+
+        ovntrace_node_append(super, OVNTRACE_NODE_MODIFY, "%s", ds_cstr(&s));
+
+        ds_destroy(&s);
+    }
+}
+
+static void
+execute_move(const struct ovnact_move *move, struct flow *uflow,
+             struct ovs_list *super)
+{
+    struct mf_subfield dst = expr_resolve_field(&move->lhs);
+    struct mf_subfield src = expr_resolve_field(&move->rhs);
+    summarize_move(&src, &move->lhs, &dst, uflow, super);
+    mf_subfield_copy(&src, &dst, uflow, NULL);
+}
+
+static void
+execute_exchange(const struct ovnact_move *move, struct flow *uflow,
+             struct ovs_list *super)
+{
+    struct mf_subfield a = expr_resolve_field(&move->lhs);
+    struct mf_subfield b = expr_resolve_field(&move->rhs);
+    summarize_move(&b, &move->lhs, &a, uflow, super);
+    summarize_move(&a, &move->rhs, &b, uflow, super);
+    mf_subfield_swap(&a, &b, uflow, NULL);
+}
+
+static void
+trace__(const struct ovntrace_datapath *dp, struct flow *uflow,
+        uint8_t table_id, enum ovntrace_pipeline pipeline,
+        struct ovs_list *super);
+
+static void
+trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
+              const struct ovntrace_datapath *dp, struct flow *uflow,
+              uint8_t table_id, enum ovntrace_pipeline pipeline,
+              struct ovs_list *super);
+static void
+execute_output(const struct ovntrace_datapath *dp, struct flow *uflow,
+               enum ovntrace_pipeline pipeline, struct ovs_list *super)
+{
+    uint16_t key = uflow->regs[MFF_LOG_OUTPORT - MFF_REG0];
+    if (!key) {
+        ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+                             "*** output to null logical port");
+        return;
+    }
+
+    const struct ovntrace_port *port = ovntrace_port_find_by_key(dp, key);
+    const struct ovntrace_mcgroup *mcgroup = ovntrace_mcgroup_find_by_key(dp,
+                                                                          key);
+    const char *out_name = (port ? port->name
+                            : mcgroup ? mcgroup->name
+                            : "(unnamed)");
+    if (!port && !mcgroup) {
+        ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+                             "*** unknown port or multicast group %"PRIu16,
+                             key);
+    }
+
+    if (pipeline == P_EGRESS) {
+        ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
+                             "/* output to \"%s\", type \"%s\" */",
+                             out_name, port ? port->type : "");
+        if (port && port->peer) {
+            const struct ovntrace_port *peer = port->peer;
+
+            struct ovntrace_node *node = ovntrace_node_append(
+                super, OVNTRACE_NODE_PIPELINE,
+                "ingress(dp=\"%s\", inport=\"%s\")",
+                peer->dp->name, peer->name);
+
+            struct flow new_uflow = *uflow;
+            new_uflow.regs[MFF_LOG_INPORT - MFF_REG0] = peer->tunnel_key;
+            new_uflow.regs[MFF_LOG_OUTPORT - MFF_REG0] = 0;
+            trace__(peer->dp, &new_uflow, 0, P_INGRESS, &node->subs);
+        } else {
+            ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
+                                 "output(\"%s\")", out_name);
+
+        }
+        return;
+    }
+
+    struct flow egress_uflow = *uflow;
+    for (int i = 0; i < FLOW_N_REGS; i++) {
+        if (i != MFF_LOG_INPORT - MFF_REG0 &&
+            i != MFF_LOG_OUTPORT - MFF_REG0) {
+            egress_uflow.regs[i] = 0;
+        }
+    }
+
+    uint16_t in_key = uflow->regs[MFF_LOG_INPORT - MFF_REG0];
+    const struct ovntrace_port *inport = ovntrace_port_find_by_key(dp, in_key);
+    const char *inport_name = !in_key ? "" : inport ? inport->name : "(unnamed)";
+    uint32_t flags = uflow->regs[MFF_LOG_FLAGS - MFF_REG0];
+    bool allow_loopback = (flags & MLF_ALLOW_LOOPBACK) != 0;
+
+    if (mcgroup) {
+        struct ovntrace_node *mcnode = ovntrace_node_append(
+            super, OVNTRACE_NODE_PIPELINE,
+            "multicast(dp=\"%s\", mcgroup=\"%s\")",
+            dp->name, mcgroup->name);
+        for (size_t i = 0; i < mcgroup->n_ports; i++) {
+            const struct ovntrace_port *p = mcgroup->ports[i];
+
+            struct ovntrace_node *node = ovntrace_node_append(
+                &mcnode->subs, OVNTRACE_NODE_PIPELINE,
+                "egress(dp=\"%s\", inport=\"%s\", outport=\"%s\")",
+                dp->name, inport_name, p->name);
+
+            if (p->tunnel_key != in_key || allow_loopback) {
+                node->always_indent = true;
+
+                egress_uflow.regs[MFF_LOG_OUTPORT - MFF_REG0] = p->tunnel_key;
+                trace__(dp, &egress_uflow, 0, P_EGRESS, &node->subs);
+            } else {
+                ovntrace_node_append(&node->subs, OVNTRACE_NODE_OUTPUT,
+                                     "/* omitting output because inport == outport && !flags.loopback */");
+            }
+        }
+    } else if (port->tunnel_key != in_key || allow_loopback) {
+        struct ovntrace_node *node = ovntrace_node_append(
+            super, OVNTRACE_NODE_PIPELINE,
+            "egress(dp=\"%s\", inport=\"%s\", outport=\"%s\")",
+            dp->name, inport_name, out_name);
+
+        trace__(dp, &egress_uflow, 0, P_EGRESS, &node->subs);
+    } else {
+        ovntrace_node_append(super, OVNTRACE_NODE_OUTPUT,
+                             "/* omitting output because inport == outport && !flags.loopback */");
+    }
+}
+
+static void
+execute_arp(const struct ovnact_nest *on, const struct ovntrace_datapath *dp,
+            const struct flow *uflow, uint8_t table_id,
+            enum ovntrace_pipeline pipeline, struct ovs_list *super)
+{
+    struct flow arp_flow = *uflow;
+
+    /* Zero fields that are no longer relevant. */
+    arp_flow.nw_frag = 0;
+    arp_flow.nw_tos = 0;
+    arp_flow.nw_ttl = 0;
+    arp_flow.tcp_flags = 0;
+
+    /* Update fields for ARP. */
+    arp_flow.dl_type = htons(ETH_TYPE_ARP);
+    arp_flow.nw_proto = ARP_OP_REQUEST;
+    arp_flow.arp_sha = arp_flow.dl_src;
+    arp_flow.arp_tha = eth_addr_zero;
+    /* ARP SPA is already in arp_flow.nw_src. */
+    /* ARP TPA is already in arp_flow.nw_dst. */
+
+    struct ovntrace_node *node = ovntrace_node_append(
+        super, OVNTRACE_NODE_TRANSFORMATION, "arp");
+
+    trace_actions(on->nested, on->nested_len, dp, &arp_flow,
+                  table_id, pipeline, &node->subs);
+}
+
+static void
+execute_nd_na(const struct ovnact_nest *on, const struct ovntrace_datapath *dp,
+              const struct flow *uflow, uint8_t table_id,
+              enum ovntrace_pipeline pipeline, struct ovs_list *super)
+{
+    struct flow na_flow = *uflow;
+
+    /* Update fields for NA. */
+    na_flow.dl_src = uflow->dl_dst;
+    na_flow.dl_dst = uflow->dl_src;
+    na_flow.ipv6_dst = uflow->ipv6_src;
+    na_flow.ipv6_src = uflow->nd_target;
+    na_flow.tp_src = htons(136);
+    na_flow.arp_sha = eth_addr_zero;
+    na_flow.arp_tha = uflow->dl_dst;
+
+    struct ovntrace_node *node = ovntrace_node_append(
+        super, OVNTRACE_NODE_TRANSFORMATION, "nd_na");
+
+    trace_actions(on->nested, on->nested_len, dp, &na_flow,
+                  table_id, pipeline, &node->subs);
+}
+
+static void
+execute_get_mac_bind(const struct ovnact_get_mac_bind *bind,
+                     const struct ovntrace_datapath *dp,
+                     struct flow *uflow, struct ovs_list *super)
+{
+    /* Get logical port number.*/
+    struct mf_subfield port_sf = expr_resolve_field(&bind->port);
+    ovs_assert(port_sf.n_bits == 32);
+    uint32_t port_key = mf_get_subfield(&port_sf, uflow);
+
+    /* Get IP address. */
+    struct mf_subfield ip_sf = expr_resolve_field(&bind->ip);
+    ovs_assert(ip_sf.n_bits == 32 || ip_sf.n_bits == 128);
+    union mf_subvalue ip_sv;
+    mf_read_subfield(&ip_sf, uflow, &ip_sv);
+    struct in6_addr ip = (ip_sf.n_bits == 32
+                          ? in6_addr_mapped_ipv4(ip_sv.ipv4)
+                          : ip_sv.ipv6);
+
+    const struct ovntrace_mac_binding *binding
+        = ovntrace_mac_binding_find(dp, port_key, &ip);
+
+    const struct eth_addr mac = binding ? binding->mac : eth_addr_zero;
+    if (binding) {
+        ovntrace_node_append(super, OVNTRACE_NODE_ACTION,
+                             "/* MAC binding to "ETH_ADDR_FMT". */",
+                             ETH_ADDR_ARGS(mac));
+    } else {
+        ovntrace_node_append(super, OVNTRACE_NODE_ACTION,
+                             "/* No MAC binding. */");
+    }
+    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
+                         "eth.dst = "ETH_ADDR_FMT, ETH_ADDR_ARGS(mac));
+}
+
+static void
+execute_put_dhcp_opts(const struct ovnact_put_dhcp_opts *pdo,
+                      struct flow *uflow)
+{
+    struct mf_subfield sf = expr_resolve_field(&pdo->dst);
+    union mf_subvalue sv = { .u8_val = 1 };
+    mf_write_subfield_flow(&sf, &sv, uflow);
+}
+
+static void
+trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len,
+              const struct ovntrace_datapath *dp, struct flow *uflow,
+              uint8_t table_id, enum ovntrace_pipeline pipeline,
+              struct ovs_list *super)
+{
+    if (!ovnacts_len) {
+        ovntrace_node_append(super, OVNTRACE_NODE_ACTION, "drop;");
+        return;
+    }
+
+    struct ds s = DS_EMPTY_INITIALIZER;
+    const struct ovnact *a;
+    OVNACT_FOR_EACH (a, ovnacts, ovnacts_len) {
+        ds_clear(&s);
+        ovnacts_format(a, sizeof *a * (ovnact_next(a) - a), &s);
+        ovntrace_node_append(super, OVNTRACE_NODE_ACTION, "%s", ds_cstr(&s));
+
+        switch (a->type) {
+        case OVNACT_OUTPUT:
+            execute_output(dp, uflow, pipeline, super);
+            break;
+
+        case OVNACT_NEXT:
+            trace__(dp, uflow, table_id + 1, pipeline, super);
+            break;
+
+        case OVNACT_LOAD:
+            execute_load(ovnact_get_LOAD(a), dp, uflow, super);
+            break;
+
+        case OVNACT_MOVE:
+            execute_move(ovnact_get_MOVE(a), uflow, super);
+            break;
+
+        case OVNACT_EXCHANGE:
+            execute_exchange(ovnact_get_EXCHANGE(a), uflow, super);
+            break;
+
+        case OVNACT_DEC_TTL:
+            if (is_ip_any(uflow)) {
+                if (uflow->nw_ttl) {
+                    uflow->nw_ttl--;
+                    ovntrace_node_append(super, OVNTRACE_NODE_MODIFY,
+                                         "ip.ttl--");
+                } else {
+                    ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+                                         "*** TTL underflow");
+                }
+            } else {
+                ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+                                     "*** TTL decrement of non-IP packet");
+            }
+            break;
+
+        case OVNACT_CT_NEXT:
+        case OVNACT_CT_COMMIT:
+        case OVNACT_CT_DNAT:
+        case OVNACT_CT_SNAT:
+        case OVNACT_CT_LB:
+            ovntrace_node_append(super, OVNTRACE_NODE_ERROR,
+                                 "*** ct_* actions not implemented");
+            break;
+
+        case OVNACT_ARP:
+            execute_arp(ovnact_get_ARP(a), dp, uflow, table_id, pipeline,
+                        super);
+            break;
+
+        case OVNACT_ND_NA:
+            execute_nd_na(ovnact_get_ND_NA(a), dp, uflow, table_id, pipeline,
+                          super);
+            break;
+
+        case OVNACT_GET_ARP:
+            execute_get_mac_bind(ovnact_get_GET_ARP(a), dp, uflow, super);
+            break;
+
+        case OVNACT_GET_ND:
+            execute_get_mac_bind(ovnact_get_GET_ND(a), dp, uflow, super);
+            break;
+
+        case OVNACT_PUT_ARP:
+        case OVNACT_PUT_ND:
+            /* Nothing to do for tracing. */
+            break;
+
+        case OVNACT_PUT_DHCP_OPTS:
+            execute_put_dhcp_opts(ovnact_get_PUT_DHCP_OPTS(a), uflow);
+            break;
+        }
+
+    }
+    ds_destroy(&s);
+}
+
+static bool
+may_omit_stage(const struct ovntrace_flow *f, uint8_t table_id)
+{
+    return (f
+            && f->match->type == EXPR_T_BOOLEAN && f->match->boolean
+            && f->ovnacts_len == OVNACT_NEXT_SIZE
+            && f->ovnacts->type == OVNACT_NEXT
+            && ovnact_get_NEXT(f->ovnacts)->ltable == table_id + 1);
+}
+
+static void
+trace__(const struct ovntrace_datapath *dp, struct flow *uflow,
+        uint8_t table_id, enum ovntrace_pipeline pipeline,
+        struct ovs_list *super)
+{
+    const struct ovntrace_flow *f;
+    for (;;) {
+        f = ovntrace_flow_lookup(dp, uflow, table_id, pipeline);
+        if (!may_omit_stage(f, table_id)) {
+            break;
+        }
+        table_id++;
+    }
+
+    struct ds s = DS_EMPTY_INITIALIZER;
+    ds_put_format(&s, "%2d. ", table_id);
+    if (f) {
+        if (f->stage_name) {
+            ds_put_format(&s, "%s: ", f->stage_name);
+        }
+        ds_put_format(&s, "%s, priority %d", f->match_s, f->priority);
+    } else {
+        ds_put_format(&s, "no match");
+    }
+    struct ovntrace_node *node = ovntrace_node_append(
+        super, OVNTRACE_NODE_TABLE, "%s", ds_cstr(&s));
+    ds_destroy(&s);
+
+    if (f) {
+        trace_actions(f->ovnacts, f->ovnacts_len, dp, uflow, table_id,
+                      pipeline, &node->subs);
+    }
+}
+
+static char *
+trace(const char *dp_s, const char *flow_s)
+{
+    const struct ovntrace_datapath *dp = ovntrace_datapath_find_by_name(dp_s);
+    if (!dp) {
+        ovs_fatal(0, "unknown datapath \"%s\"", dp_s);
+    }
+
+    struct flow uflow;
+    char *error = expr_parse_microflow(flow_s, &symtab, &address_sets,
+                                       ovntrace_lookup_port, dp, &uflow);
+    if (error) {
+        ovs_fatal(0, "error parsing flow: %s", error);
+    }
+
+    uint32_t in_key = uflow.regs[MFF_LOG_INPORT - MFF_REG0];
+    if (!in_key) {
+        VLOG_WARN("microflow does not specify ingress port");
+    }
+    const struct ovntrace_port *inport = ovntrace_port_find_by_key(dp, in_key);
+    const char *inport_name = inport ? inport->name : "(unnamed)";
+
+    struct ds output = DS_EMPTY_INITIALIZER;
+
+    ds_put_cstr(&output, "# ");
+    flow_format(&output, &uflow);
+    ds_put_char(&output, '\n');
+
+    struct ovs_list root = OVS_LIST_INITIALIZER(&root);
+    struct ovntrace_node *node = ovntrace_node_append(
+        &root, OVNTRACE_NODE_PIPELINE, "ingress(dp=\"%s\", inport=\"%s\")",
+        dp->name, inport_name);
+    trace__(dp, &uflow, 0, P_INGRESS, &node->subs);
+
+    bool multiple = (detailed + summary + minimal) > 1;
+    if (detailed) {
+        if (multiple) {
+            ds_put_cstr(&output, "# Detailed trace.\n");
+        }
+        ovntrace_node_print_details(&output, &root, 0);
+    }
+
+    if (summary) {
+        if (multiple) {
+            ds_put_cstr(&output, "# Summary trace.\n");
+        }
+        struct ovs_list clone = OVS_LIST_INITIALIZER(&clone);
+        ovntrace_node_clone(&root, &clone);
+        ovntrace_node_prune_summary(&clone);
+        ovntrace_node_print_summary(&output, &clone, 0);
+    }
+
+    if (minimal) {
+        if (multiple) {
+            ds_put_cstr(&output, "# Minimal trace.\n");
+        }
+        ovntrace_node_prune_hard(&root);
+        ovntrace_node_print_summary(&output, &root, 0);
+    }
+    return ds_steal_cstr(&output);
+}
+
+static void
+ovntrace_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
+              const char *argv[] OVS_UNUSED, void *exiting_)
+{
+    bool *exiting = exiting_;
+    *exiting = true;
+    unixctl_command_reply(conn, NULL);
+}
+
+static void
+ovntrace_trace(struct unixctl_conn *conn, int argc,
+               const char *argv[], void *aux OVS_UNUSED)
+{
+    detailed = summary = minimal = false;
+    while (argc > 1 && argv[1][0] == '-') {
+        if (!strcmp(argv[1], "--detailed")) {
+            detailed = true;
+        } else if (!strcmp(argv[1], "--summary")) {
+            summary = true;
+        } else if (!strcmp(argv[1], "--minimal")) {
+            minimal = true;
+        } else if (!strcmp(argv[1], "--all")) {
+            detailed = summary = minimal = true;
+        } else {
+            unixctl_command_reply_error(conn, "unknown option");
+            return;
+        }
+        argc--;
+        argv++;
+    }
+    if (!detailed && !summary && !minimal) {
+        detailed = true;
+    }
+
+    if (argc != 3) {
+        unixctl_command_reply_error(
+            conn, "exactly 2 non-option arguments are required");
+        return;
+    }
+
+    char *output = trace(argv[1], argv[2]);
+    unixctl_command_reply(conn, output);
+    free(output);
+}
diff --git a/tests/ovn.at b/tests/ovn.at
index 2f91838..5add79a 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -1085,6 +1085,7 @@ ip_to_hex() {
 # 9. ARP requests to known IPs are responded directly.
 #
 # 10. No response to ARP requests for unknown IPs.
+
 for is in 1 2 3; do
     for js in 1 2 3; do
         s=$is$js
@@ -1207,6 +1208,221 @@ OVN_CLEANUP([hv1],[hv2],[hv3])
 
 AT_CLEANUP
 
+AT_SETUP([ovn -- trace 1 LS, 3 LSPs])
+AT_SKIP_IF([test $HAVE_PYTHON = no])
+ovn_start
+
+# Create a logical switch and some logical ports.
+# Turn on port security on all lports except ls1.
+# Make ls1 a destination for unknown MACs.
+# Add some ACLs for Ethertypes 1234, 1235, 1236.
+ovn-nbctl ls-add lsw0
+ovn-sbctl chassis-add hv0 geneve 127.0.0.1
+for i in 1 2 3; do
+    ovn-nbctl lsp-add lsw0 lp$i
+    ovn-sbctl lsp-bind lp$i hv0
+    if test $i = 1; then
+	ovn-nbctl lsp-set-addresses lp$i "f0:00:00:00:00:0$i 192.168.0.$i" unknown
+    else
+	if test $i = 3; then
+	    ip_addrs="192.168.0.$i fe80::ea2a:eaff:fe28:$i/64 192.169.0.$i"
+	else
+	    ip_addrs="192.168.0.$i"
+	fi
+	ovn-nbctl lsp-set-addresses lp$i "f0:00:00:00:00:$i $ip_addrs"
+	ovn-nbctl lsp-set-port-security lp$i f0:00:00:00:00:$i
+    fi
+done
+ovn-nbctl acl-add lsw0 from-lport 1000 'eth.type == 0x1234' drop
+ovn-nbctl acl-add lsw0 from-lport 1000 'eth.type == 0x1235 && inport == "lp1"' drop
+ovn-nbctl acl-add lsw0 to-lport 1000 'eth.type == 0x1236 && outport == "lp3"' drop
+ovn-nbctl create Address_Set name=set1 addresses=\"f0:00:00:00:00:01\",\"f0:00:00:00:00:02\"
+ovn-nbctl acl-add lsw0 to-lport 1000 'eth.type == 0x1237 && eth.src == $set1 && outport == "lp3"' drop
+
+ovn-nbctl --wait=sb sync
+on_exit 'kill `cat ovn-trace.pid`'
+ovn-trace --detach --pidfile --no-chdir
+
+# test_packet INPORT DST SRC [-vlan] [-eth TYPE] OUTPORT...
+#
+# This shell function causes a packet to be received on INPORT.  The packet's
+# content has Ethernet destination DST and source SRC (each exactly 12 hex
+# digits) and Ethernet type ETHTYPE (4 hex digits).  The OUTPORTs (zero or
+# more) list the VIFs on which the packet should be received.  INPORT and the
+# OUTPORTs are specified as logical switch port numbers, e.g. 11 for vif11.
+test_packet() {
+    local inport=$1 eth_dst=$2 eth_src=$3; shift; shift; shift
+    uflow="inport==\"lp$inport\" && eth.dst==$eth_dst && eth.src==$eth_src"
+    while :; do
+    	case $1 in # (
+	    -vlan) uflow="$uflow && vlan.vid == 1234"; shift ;; # (
+	    -eth) uflow="$uflow && eth.type == 0x$2"; shift; shift ;; # (
+	    *) break ;;
+	esac
+    done
+    for outport; do
+	echo "output(\"lp$outport\");"
+    done > expout
+
+    AT_CAPTURE_FILE([trace])
+    AT_CHECK([ovs-appctl -t ovn-trace trace --all lsw0 "$uflow" | tee trace | sed '1,/Minimal trace/d'], [0], [expout])
+}
+
+# test_arp INPORT SHA SPA TPA [REPLY_HA]
+#
+# Causes a packet to be received on INPORT.  The packet is an ARP
+# request with SHA, SPA, and TPA as specified.  If REPLY_HA is provided, then
+# it should be the hardware address of the target to expect to receive in an
+# ARP reply; otherwise no reply is expected.
+#
+# INPORT is an logical switch port number, e.g. 11 for vif11.
+# SHA and REPLY_HA are each 12 hex digits.
+# SPA and TPA are each 8 hex digits.
+test_arp() {
+    local inport=$1 sha=$2 spa=$3 tpa=$4 reply_ha=$5
+
+    local request="inport == \"lp$inport\"
+                   && eth.dst == ff:ff:ff:ff:ff:ff && eth.src == $sha
+                   && arp.op == 1 && arp.sha == $sha && arp.spa == $spa
+		   && arp.tha == ff:ff:ff:ff:ff:ff && arp.tpa == $tpa"
+
+    if test -z "$reply_ha"; then
+        reply=
+	local i
+	for i in 1 2 3; do
+	    if test $i != $inport; then
+		reply="${reply}output(\"lp$i\");
+"
+	    fi
+	done
+    else
+        reply="\
+eth.dst = $sha;
+eth.src = $reply_ha;
+arp.op = 2;
+arp.tha = $sha;
+arp.sha = $reply_ha;
+arp.tpa = $spa;
+arp.spa = $tpa;
+output(\"lp$inport\");
+"
+    fi
+
+    AT_CAPTURE_FILE([trace])
+    AT_CHECK_UNQUOTED([ovs-appctl -t ovn-trace trace --all lsw0 "$request" | tee trace | sed '1,/Minimal trace/d'], [0], [$reply])
+}
+
+# Send packets between all pairs of source and destination ports:
+#
+# 1. Unicast packets are delivered to exactly one logical switch port
+#    (except that packets destined to their input ports are dropped).
+#
+# 2. Broadcast and multicast are delivered to all logical switch ports
+#    except the input port.
+#
+# 3. When port security is turned on, the switch drops packets from the wrong
+#    MAC address.
+#
+# 4. The switch drops all packets with a VLAN tag.
+#
+# 5. The switch drops all packets with a multicast source address.  (This only
+#    affects behavior when port security is turned off, since otherwise port
+#    security would drop the packet anyway.)
+#
+# 6. The switch delivers packets with an unknown destination to logical
+#    switch ports with "unknown" among their MAC addresses (and port
+#    security disabled).
+#
+# 7. The switch drops unicast packets that violate an ACL.
+#
+# 8. The switch drops multicast and broadcast packets that violate an ACL.
+#
+# 9. ARP requests to known IPs are responded directly.
+#
+# 10. No response to ARP requests for unknown IPs.
+
+for s in 1 2 3; do
+    bcast=
+    unknown=
+    bacl2=
+    bacl3=
+    for d in 1 2 3; do
+	echo
+	echo "lp$s -> lp$d"
+	if test $d != $s; then unicast=$d; else unicast=; fi
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:0$s $unicast      #1
+
+	if test $d != $s && test $s = 1; then
+	    impersonate=$d
+	else
+	    impersonate=
+	fi
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:55 $impersonate   #3
+
+	if test $d != $s && test $s != 1; then acl2=$d; else acl2=; fi
+	if test $d != $s && test $d != 3; then acl3=$d; else acl3=; fi
+	if test $d = $s || ( (test $s = 1 || test $s = 2) && test $d = 3); then
+	    # Source of 1 or 2 and dest of 3 should be dropped
+	    # due to the 4th ACL that uses address_set(set1).
+	    acl4=
+	else
+	    acl4=$d
+	fi
+
+	#7, acl1 to acl4:
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:0$s -eth 1234
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:0$s -eth 1235 $acl2
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:0$s -eth 1236 $acl3
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:0$s -eth 1237 $acl4
+
+	test_packet $s f0:00:00:00:00:0$d f0:00:00:00:00:55 -vlan          #4
+	test_packet $s f0:00:00:00:00:0$d 01:00:00:00:00:0$s               #5
+
+	if test $d != $s && test $d = 1; then
+	    unknown="$unknown $d"
+	fi
+	bcast="$bcast $unicast"
+	bacl2="$bacl2 $acl2"
+	bacl3="$bacl3 $acl3"
+
+	sip=192.168.0.$s
+	tip=192.168.0.$d
+	tip_unknown=11.11.11.11
+	test_arp $s f0:00:00:00:00:0$s $sip $tip f0:00:00:00:00:0$d        #9
+	test_arp $s f0:00:00:00:00:0$s $sip $tip_unknown                   #10
+
+	if test $d = 3; then
+	    # lp3 has an additional ip 192.169.0.[123]3.
+	    tip=192.169.0.$d
+	    test_arp $s f0:00:00:00:00:0$s $sip $tip f0:00:00:00:00:0$d    #9
+	fi
+    done
+
+    # Broadcast and multicast.
+    test_packet $s ff:ff:ff:ff:ff:ff f0:00:00:00:00:0$s $bcast             #2
+    test_packet $s 01:00:00:00:00:00 f0:00:00:00:00:0$s $bcast             #2
+    if test $s = 1; then
+	bcast_impersonate=$bcast
+    else
+	bcast_impersonate=
+    fi
+    test_packet $s 01:00:00:00:00:00 f0:00:00:00:00:44 $bcast_impersonate  #3
+
+    test_packet $s f0:00:00:00:ff:ff f0:00:00:00:00:0$s $unknown           #6
+
+    #8, acl1 to acl3:
+    test_packet $s ff:ff:ff:ff:ff:ff f0:00:00:00:00:0$s -eth 1234
+    test_packet $s ff:ff:ff:ff:ff:ff f0:00:00:00:00:0$s -eth 1235 $bacl2
+    test_packet $s ff:ff:ff:ff:ff:ff f0:00:00:00:00:0$s -eth 1236 $bacl3
+
+    #8, acl1 to acl3:
+    test_packet $s 01:00:00:00:00:00 f0:00:00:00:00:0$s -eth 1234
+    test_packet $s 01:00:00:00:00:00 f0:00:00:00:00:0$s -eth 1235 $bacl2
+    test_packet $s 01:00:00:00:00:00 f0:00:00:00:00:0$s -eth 1236 $bacl3
+done
+
+AT_CLEANUP
+
 # 2 hypervisors, 4 logical ports per HV
 # 2 locally attached networks (one flat, one vlan tagged over same device)
 # 2 ports per HV on each network
-- 
2.1.3




More information about the dev mailing list