[ovs-dev] [PATCH v3] dpif-netdev: Conditional EMC insert

Ciara Loftus ciara.loftus at intel.com
Tue Jan 31 17:55:50 UTC 2017


Unconditional insertion of EMC entries results in EMC thrashing at high
numbers of parallel flows. When this occurs, the performance of the EMC
often falls below that of the dpcls classifier, rendering the EMC
practically useless.

Instead of unconditionally inserting entries into the EMC when a miss
occurs, use a 1% probability of insertion. This ensures that the most
frequent flows have the highest chance of creating an entry in the EMC,
and the probability of thrashing the EMC is also greatly reduced.

The probability of insertion is configurable, via the
other_config:emc-insert-prob option. For example the following command
increases the insertion probability to 1/10 ie. 10%.

ovs-vsctl set Open_vSwitch . other_config:emc-insert-prob=10

Signed-off-by: Ciara Loftus <ciara.loftus at intel.com>
Signed-off-by: Georg Schmuecking <georg.schmuecking at ericsson.com>
Co-authored-by: Georg Schmuecking <georg.schmuecking at ericsson.com>
---
v3:
- Use new dpif other_config infrastructure to tidy up how the
  emc-insert-prob value is passed to dpif-netdev.
v2:
- Enable probability configurability via other_config:emc-insert-prob
  option.

 Documentation/howto/dpdk.rst | 20 +++++++++++++++++
 NEWS                         |  2 ++
 lib/dpif-netdev.c            | 53 ++++++++++++++++++++++++++++++++++++++++++--
 vswitchd/vswitch.xml         | 16 +++++++++++++
 4 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d1e6e89..f2e888b 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -354,6 +354,26 @@ the `DPDK documentation
 
 Note: Not all DPDK virtual PMD drivers have been tested and verified to work.
 
+EMC Insertion Probability
+-------------------------
+By default 1 in every 100 flows are inserted into the Exact Match Cache (EMC).
+It is possible to change this insertion probability by setting the
+``emc-insert-prob`` option::
+
+    $ ovs-vsctl --no-wait set Open_vSwitch . other_config:emc-insert-prob=N
+
+where:
+
+``N``
+  is a positive integer between 0 and 4294967295 (maximum unsigned 32bit int).
+
+If ``N`` is set to 1, an insertion will be performed for every flow. The lower
+the value of ``emc-insert-prob`` the higher the probability of insertion,
+except for the value 0 which will result in no insertions being performed and
+thus essentially disabling the EMC.
+
+For more information on the EMC refer to :doc:`/intro/install/dpdk` .
+
 .. _dpdk-ovs-in-guest:
 
 OVS with DPDK Inside VMs
diff --git a/NEWS b/NEWS
index 6838649..5a21580 100644
--- a/NEWS
+++ b/NEWS
@@ -65,6 +65,8 @@ Post-v2.6.0
        device will not be available for use until a valid dpdk-devargs is
        specified.
      * Virtual DPDK Poll Mode Driver (vdev PMD) support.
+     * EMC insertion probability is reduced to 1/100 and is configurable via
+       the new 'other_config:emc-insert-prob' option.
    - Fedora packaging:
      * A package upgrade does not automatically restart OVS service.
    - ovs-vswitchd/ovs-vsctl:
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 0be5db5..e514ddb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -144,6 +144,10 @@ struct netdev_flow_key {
 #define EM_FLOW_HASH_MASK (EM_FLOW_HASH_ENTRIES - 1)
 #define EM_FLOW_HASH_SEGS 2
 
+/* Default EMC insert probability is 1 / DEFAULT_EM_FLOW_INSERT_PROB */
+#define DEFAULT_EM_FLOW_INSERT_PROB 100
+#define DEFAULT_EM_FLOW_INSERT_MIN (UINT32_MAX / DEFAULT_EM_FLOW_INSERT_PROB)
+
 struct emc_entry {
     struct dp_netdev_flow *flow;
     struct netdev_flow_key key;   /* key.hash used for emc hash value. */
@@ -254,6 +258,9 @@ struct dp_netdev {
     uint64_t last_tnl_conf_seq;
 
     struct conntrack conntrack;
+
+    /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
+    atomic_uint32_t emc_insert_min;
 };
 
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
@@ -1066,6 +1073,8 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
 
     conntrack_init(&dp->conntrack);
 
+    atomic_init(&dp->emc_insert_min, DEFAULT_EM_FLOW_INSERT_MIN);
+
     cmap_init(&dp->poll_threads);
     ovs_mutex_init_recursive(&dp->non_pmd_mutex);
     ovsthread_key_create(&dp->per_pmd_key, NULL);
@@ -1943,6 +1952,22 @@ emc_insert(struct emc_cache *cache, const struct netdev_flow_key *key,
     emc_change_entry(to_be_replaced, flow, key);
 }
 
+static inline void
+emc_probabilistic_insert(struct emc_cache *cache,
+                         const struct netdev_flow_key *key,
+                         struct dp_netdev_flow *flow,
+                         uint32_t random_val, uint32_t min)
+{
+    /* Insert an entry into the EMC based on probability value 'min'. By
+     * default the value is UINT32_MAX / 100 which yields an insertion
+     * probability of 1/100. This value may be different for the DPDK datapath
+     * depending on whether or not the user has configured the
+     * 'emc-insert-prob' other_config option. */
+    if ((key->hash ^ random_val) < min) {
+        emc_insert(cache, key, flow);
+    }
+}
+
 static inline struct dp_netdev_flow *
 emc_lookup(struct emc_cache *cache, const struct netdev_flow_key *key)
 {
@@ -2731,6 +2756,7 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
 {
     struct dp_netdev *dp = get_dp_netdev(dpif);
     const char *cmask = smap_get(other_config, "pmd-cpu-mask");
+    int insert_prob = smap_get_int(other_config, "emc-insert-prob", -1);
 
     if (!nullable_string_is_equal(dp->pmd_cmask, cmask)) {
         free(dp->pmd_cmask);
@@ -2738,6 +2764,13 @@ dpif_netdev_set_config(struct dpif *dpif, const struct smap *other_config)
         dp_netdev_request_reconfigure(dp);
     }
 
+    if (insert_prob >= 0 && insert_prob <= UINT32_MAX) {
+        uint32_t insert_min = insert_prob == 0 ? 0 : UINT32_MAX / insert_prob;
+        if (insert_min != dp->emc_insert_min) {
+            atomic_store_relaxed(&dp->emc_insert_min, insert_min);
+        }
+    }
+
     return 0;
 }
 
@@ -4194,7 +4227,15 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet,
         }
         ovs_mutex_unlock(&pmd->flow_mutex);
 
-        emc_insert(&pmd->flow_cache, key, netdev_flow);
+#ifdef DPDK_NETDEV
+        uint32_t min;
+        atomic_read_relaxed(&pmd->dp->emc_insert_min, &min);
+        emc_probabilistic_insert(&pmd->flow_cache, key, netdev_flow,
+                                 pmd->last_cycles, min);
+#else
+        emc_probabilistic_insert(&pmd->flow_cache, key, netdev_flow,
+                                 random_uint32(), DEFAULT_EM_FLOW_INSERT_MIN);
+#endif
     }
 }
 
@@ -4288,7 +4329,15 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 
         flow = dp_netdev_flow_cast(rules[i]);
 
-        emc_insert(flow_cache, &keys[i], flow);
+#ifdef DPDK_NETDEV
+        uint32_t min;
+        atomic_read_relaxed(&pmd->dp->emc_insert_min, &min);
+        emc_probabilistic_insert(flow_cache, &keys[i], flow,
+                                 pmd->last_cycles, min);
+#else
+        emc_probabilistic_insert(flow_cache, &keys[i], flow,
+                                 random_uint32(), DEFAULT_EM_FLOW_INSERT_MIN);
+#endif
         dp_netdev_queue_batches(packet, flow, &keys[i].mf, batches, n_batches);
     }
 
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 146a816..5b789c7 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -335,6 +335,22 @@
           datapaths.
         </p>
       </column>
+
+      <column name="other_config" key="emc-insert-prob"
+              type='{"type": "integer", "minInteger": 0, "maxInteger": 4294967295}'>
+        <p>
+          Specifies the probability (1/emc-insert-prob) of a flow being
+          inserted into the Exact Match Cache (EMC). Higher values of
+          emc-insert-prob will result in less insertions, and lower
+          values will result in more insertions. A value of 1 will result in
+          an insertion for every flow whereas a value of zero will result in
+          no insertions and essentially disable the EMC.
+        </p>
+        <p>
+          Defaults to 100 ie. there is 1/100 chance of EMC insertion. Only
+          applies to <code>netdev</code> (userspace) bridges.
+        </p>
+      </column>
     </group>
 
     <group title="Status">
-- 
2.4.11



More information about the dev mailing list