[ovs-dev] [PATCH] ofproto-dpif-governor: Improve performance when most flows get set up.

Ben Pfaff blp at nicira.com
Mon Jun 25 16:49:27 UTC 2012


The "flow setup governor" was introduced to avoid the cost of setting up
short flows when there are many of them.  It works very well for short
flows, in fact.  However, when the bulk of flows are short, but still long
enough to be set up by the governor, we end up with the worst of both
worlds: OVS processes the first 5 packets of every flow "by hand" and then
it still has to set up a flow.

This commit refines the flow setup governor so that, when most of the flows
that go through it actually get set up, it in turn starts setting up most
flows at the first packet.  When it does this, it continues to sample a
small fraction of the flows in the governor's usual manner, so that if the
behavior changes it can react to it.

This increases netperf TCP_CRR transactions per second by about 25% in my
test setup, without affecting "ovs-benchmark rate" performance.

(I found that to get relatively stable performance for TCP_CRR, regardless
of whether Open vSwitch or any kind of bridging was involved, I had to pin
the netperf processes on each side of the link to a single core.  I found
that my NIC's interrupts were already pinned.  Thanks to Luca Giraudo
<lgiraudo at nicira.com> for these hints.)

Bug #12080.
Reported-by: Gurucharan Shetty <gshetty at nicira.com>
Signed-off-by: Ben Pfaff <blp at nicira.com>
---
 ofproto/ofproto-dpif-governor.c |   43 ++++++++++++++++++++++++++++++--------
 ofproto/ofproto-dpif-governor.h |    5 ++++
 2 files changed, 39 insertions(+), 9 deletions(-)

diff --git a/ofproto/ofproto-dpif-governor.c b/ofproto/ofproto-dpif-governor.c
index 458f8d7..506dadb 100644
--- a/ofproto/ofproto-dpif-governor.c
+++ b/ofproto/ofproto-dpif-governor.c
@@ -116,9 +116,9 @@ governor_is_idle(struct governor *g)
 bool
 governor_should_install_flow(struct governor *g, uint32_t hash, int n)
 {
+    int old_count, new_count;
     bool install_flow;
     uint8_t *e;
-    int count;
 
     assert(n > 0);
 
@@ -135,19 +135,38 @@ governor_should_install_flow(struct governor *g, uint32_t hash, int n)
         governor_new_generation(g, new_size);
     }
 
+    /* If we've set up most of the flows we've seen, then we're wasting time
+     * handling most packets one at a time, so in this case instead set up most
+     * flows directly and use the remaining flows as a sample set to adjust our
+     * criteria later.
+     *
+     * The definition of "most" is conservative, but the sample size is tuned
+     * based on a few experiments with TCP_CRR mode in netperf. */
+    if (g->n_setups >= g->n_flows - g->n_flows / 16
+        && g->n_flows >= 64
+        && hash & 0x3f) {
+        g->n_shortcuts++;
+        return true;
+    }
+
     /* Do hash table processing.
      *
      * Even-numbered hash values use high-order nibbles.
      * Odd-numbered hash values use low-order nibbles. */
     e = &g->table[(hash >> 1) & (g->size - 1)];
-    count = n + (hash & 1 ? *e >> 4 : *e & 0x0f);
-    if (count >= FLOW_SETUP_THRESHOLD) {
+    old_count = (hash & 1 ? *e >> 4 : *e & 0x0f);
+    if (!old_count) {
+        g->n_flows++;
+    }
+    new_count = n + old_count;
+    if (new_count >= FLOW_SETUP_THRESHOLD) {
+        g->n_setups++;
         install_flow = true;
-        count = 0;
+        new_count = 0;
     } else {
         install_flow = false;
     }
-    *e = hash & 1 ? (count << 4) | (*e & 0x0f) : (*e & 0xf0) | count;
+    *e = hash & 1 ? (new_count << 4) | (*e & 0x0f) : (*e & 0xf0) | new_count;
 
     return install_flow;
 }
@@ -167,24 +186,30 @@ governor_new_generation(struct governor *g, unsigned int size)
                       g->name, size / 1024);
         } else {
             VLOG_INFO("%s: processed %u packets in %.2f s, "
-                      "%s hash table to %u kB",
+                      "%s hash table to %u kB "
+                      "(%u hashes, %u setups, %u shortcuts)",
                       g->name, g->n_packets,
                       (time_msec() - g->start) / 1000.0,
                       size > g->size ? "enlarging" : "shrinking",
-                      size / 1024);
+                      size / 1024,
+                      g->n_flows, g->n_setups, g->n_shortcuts);
         }
 
         free(g->table);
         g->table = xmalloc(size * sizeof *g->table);
         g->size = size;
     } else {
-        VLOG_DBG("%s: processed %u packets in %.2f s with %u kB hash table",
+        VLOG_DBG("%s: processed %u packets in %.2f s with %u kB hash table "
+                 "(%u hashes, %u setups, %u shortcuts)",
                  g->name, g->n_packets, (time_msec() - g->start) / 1000.0,
-                 size / 1024);
+                 size / 1024, g->n_flows, g->n_setups, g->n_shortcuts);
     }
 
     /* Clear data for next generation. */
     memset(g->table, 0, size * sizeof *g->table);
     g->start = time_msec();
     g->n_packets = 0;
+    g->n_flows /= 2;
+    g->n_setups /= 2;
+    g->n_shortcuts = 0;
 }
diff --git a/ofproto/ofproto-dpif-governor.h b/ofproto/ofproto-dpif-governor.h
index 31cf4a2..6dbd0d5 100644
--- a/ofproto/ofproto-dpif-governor.h
+++ b/ofproto/ofproto-dpif-governor.h
@@ -38,6 +38,11 @@ struct governor {
     unsigned int size;          /* Table size in bytes. */
     long long int start;        /* Time when the table was last cleared. */
     unsigned int n_packets;     /* Number of packets processed. */
+
+    /* Statistics for skipping counters when most flows get set up. */
+    unsigned int n_flows;       /* Number of unique flows seen. */
+    unsigned int n_setups;      /* Number of flows set up based on counters. */
+    unsigned int n_shortcuts;   /* Number of flows set up based on history. */
 };
 
 struct governor *governor_create(const char *name);
-- 
1.7.2.5




More information about the dev mailing list